Stabilize source health monitoring
这个提交包含在:
@@ -1,30 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from hashlib import sha1
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.config import STATE_DIR
|
||||
from intel.http_client import build_session, request
|
||||
from intel.models import Candidate
|
||||
from intel.utils import parse_dt, unique
|
||||
from intel.utils import isoformat, now_utc, parse_dt, read_json, unique, write_json
|
||||
|
||||
|
||||
QUERY_BATCH_URL = "https://api.osv.dev/v1/querybatch"
|
||||
DETAIL_URL = "https://api.osv.dev/v1/vulns/{vuln_id}"
|
||||
CVSS_SCORE_RE = re.compile(r"/CVSS:3\.[01]/AV:[A-Z]/AC:[A-Z]/PR:[A-Z]/UI:[A-Z]/S:[A-Z]/C:[A-Z]/I:[A-Z]/A:[A-Z]")
|
||||
NUMERIC_SCORE_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
|
||||
DEFAULT_CACHE_TTL_SECONDS = 6 * 60 * 60
|
||||
_CACHE_DIR = STATE_DIR / "cache" / "osv"
|
||||
|
||||
|
||||
def _fetch_detail(session: requests.Session, vuln_id: str) -> Dict[str, Any]:
|
||||
def _cache_ttl_seconds() -> int:
|
||||
configured = os.environ.get("WEBSAFE_OSV_CACHE_TTL_SECONDS")
|
||||
if configured:
|
||||
try:
|
||||
return max(0, int(configured))
|
||||
except ValueError:
|
||||
return DEFAULT_CACHE_TTL_SECONDS
|
||||
return DEFAULT_CACHE_TTL_SECONDS
|
||||
|
||||
|
||||
def _cache_key(value: str) -> str:
|
||||
return sha1(value.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _cache_path(namespace: str, value: str) -> Path:
|
||||
return _CACHE_DIR / f"{namespace}-{_cache_key(value)}.json"
|
||||
|
||||
|
||||
def _load_cached_payload(namespace: str, value: str) -> Dict[str, Any] | None:
|
||||
ttl_seconds = _cache_ttl_seconds()
|
||||
if ttl_seconds <= 0:
|
||||
return None
|
||||
path = _cache_path(namespace, value)
|
||||
cached = read_json(path, default=None)
|
||||
if not isinstance(cached, dict):
|
||||
return None
|
||||
fetched_at = parse_dt(cached.get("fetched_at"))
|
||||
if fetched_at is None:
|
||||
return None
|
||||
age = (now_utc() - fetched_at).total_seconds()
|
||||
if age > ttl_seconds:
|
||||
return None
|
||||
payload = cached.get("payload")
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _write_cached_payload(namespace: str, value: str, payload: Dict[str, Any]) -> None:
|
||||
write_json(
|
||||
_cache_path(namespace, value),
|
||||
{
|
||||
"fetched_at": isoformat(now_utc()),
|
||||
"payload": payload,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _request_json(
|
||||
method: str,
|
||||
url: str,
|
||||
*,
|
||||
source: Dict[str, Any],
|
||||
cache_namespace: str,
|
||||
cache_key: str,
|
||||
session: requests.Session | None = None,
|
||||
json_body: Dict[str, Any] | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
cached = _load_cached_payload(cache_namespace, cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
response = request(
|
||||
"GET",
|
||||
DETAIL_URL.format(vuln_id=vuln_id),
|
||||
method,
|
||||
url,
|
||||
source=source,
|
||||
session=session,
|
||||
json=json_body,
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"OSV response payload was not an object for {url}")
|
||||
_write_cached_payload(cache_namespace, cache_key, payload)
|
||||
return payload
|
||||
|
||||
|
||||
def request_querybatch_json(
|
||||
source: Dict[str, Any],
|
||||
queries: List[Dict[str, Any]],
|
||||
*,
|
||||
session: requests.Session | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
body = {"queries": queries}
|
||||
return _request_json(
|
||||
"POST",
|
||||
QUERY_BATCH_URL,
|
||||
source=source,
|
||||
cache_namespace="querybatch",
|
||||
cache_key=json.dumps(body, sort_keys=True, separators=(",", ":")),
|
||||
session=session,
|
||||
json_body=body,
|
||||
)
|
||||
|
||||
|
||||
def request_detail_json(
|
||||
source: Dict[str, Any],
|
||||
vuln_id: str,
|
||||
*,
|
||||
session: requests.Session | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
return _request_json(
|
||||
"GET",
|
||||
DETAIL_URL.format(vuln_id=vuln_id),
|
||||
source=source,
|
||||
cache_namespace="detail",
|
||||
cache_key=vuln_id,
|
||||
session=session,
|
||||
)
|
||||
|
||||
|
||||
def _fixed_versions(vuln: Dict[str, Any]) -> List[str]:
|
||||
@@ -96,16 +201,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
|
||||
queries = [{"package": {"name": pkg["name"], "ecosystem": pkg["ecosystem"]}} for pkg in packages]
|
||||
session = build_session(source)
|
||||
response = request(
|
||||
"POST",
|
||||
QUERY_BATCH_URL,
|
||||
source=source,
|
||||
session=session,
|
||||
json={"queries": queries},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
payload = request_querybatch_json(source, queries, session=session)
|
||||
|
||||
detail_cache: Dict[str, Dict[str, Any]] = {}
|
||||
candidates: List[Candidate] = []
|
||||
@@ -118,7 +214,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
if since_dt is not None and modified is not None and modified < since_dt:
|
||||
continue
|
||||
if vuln_id not in detail_cache:
|
||||
detail_cache[vuln_id] = _fetch_detail(session, vuln_id)
|
||||
detail_cache[vuln_id] = request_detail_json(source, vuln_id, session=session)
|
||||
vuln = detail_cache[vuln_id]
|
||||
|
||||
aliases = unique(vuln.get("aliases", []) + [vuln.get("id")])
|
||||
|
||||
@@ -177,15 +177,10 @@ def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, An
|
||||
packages = system.get("package_names", [])
|
||||
if not packages:
|
||||
return {"kind": kind, "items_seen": 0}
|
||||
response = request(
|
||||
"POST",
|
||||
osv_api.QUERY_BATCH_URL,
|
||||
source=source,
|
||||
json={"queries": [{"package": {"name": packages[0]["name"], "ecosystem": packages[0]["ecosystem"]}}]},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
payload = osv_api.request_querybatch_json(
|
||||
source,
|
||||
[{"package": {"name": packages[0]["name"], "ecosystem": packages[0]["ecosystem"]}}],
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("OSV probe returned non-object payload")
|
||||
return {"kind": kind, "items_seen": len(payload.get("results", []))}
|
||||
|
||||
在新工单中引用
屏蔽一个用户