更新: 13 个文件 - 2026-03-18 09:44:57

这个提交包含在:
hao
2026-03-18 09:44:57 -07:00
父节点 91d6f4d04e
当前提交 dc31e6e80f
修改 13 个文件,包含 904 行新增52 行删除

查看文件

@@ -8,11 +8,12 @@ from typing import Any, Dict, List, Optional, Tuple
import requests
from intel.config import iter_all_sources
from intel.http_client import request
from intel.models import Candidate
from intel.utils import parse_dt
from . import cisa_kev, github_global, html_links, nvd_api, osv_api, rss_feed
from . import atom_feed, cisa_kev, github_global, html_links, json_feed, nvd_api, osv_api, rss_feed, vendor_index
HANDLERS = {
@@ -21,11 +22,59 @@ HANDLERS = {
"kev-json": cisa_kev.fetch,
"nvd-search": nvd_api.fetch,
"rss-feed": rss_feed.fetch,
"atom-feed": atom_feed.fetch,
"json-feed": json_feed.fetch,
"html-links": html_links.fetch,
"vendor-index": vendor_index.fetch,
}
def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
def _failure_category(exc: Exception) -> str:
if isinstance(exc, requests.exceptions.SSLError):
return "tls"
if isinstance(exc, requests.exceptions.HTTPError):
response = getattr(exc, "response", None)
status = getattr(response, "status_code", None)
if status == 429:
return "rate_limit"
return "http_status"
if isinstance(exc, requests.exceptions.RequestException):
return "network"
if isinstance(exc, ET.ParseError):
return "parse"
if isinstance(exc, ValueError):
return "schema"
return "parse"
def failure_summary(failure: Dict[str, Any]) -> str:
if isinstance(failure, str):
return failure
return failure.get("summary") or f"{failure.get('system_id')}::{failure.get('source_name')}::{failure.get('category')}::{failure.get('exception')}"
def _build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception) -> Dict[str, Any]:
response = getattr(exc, "response", None)
status_code = getattr(response, "status_code", None)
category = _failure_category(exc)
message = str(exc).strip() or exc.__class__.__name__
summary = f"{system['system_id']}::{source['name']}::{category}::{message}"
return {
"system_id": system["system_id"],
"display_name": system["display_name"],
"source_name": source["name"],
"source_kind": source["kind"],
"source_bucket": source.get("bucket_name"),
"category": category,
"exception": exc.__class__.__name__,
"message": message,
"status_code": status_code,
"url": source.get("url") or "",
"summary": summary,
}
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
kind = source["kind"]
if kind == "ghsa-global":
headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
@@ -35,6 +84,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
response = request(
"GET",
github_global.API_URL,
source=source,
headers=headers,
params={"per_page": 1, "page": 1, "ecosystem": source.get("ecosystem")},
)
@@ -52,6 +102,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
response = request(
"POST",
osv_api.QUERY_BATCH_URL,
source=source,
json={"queries": [{"package": {"name": packages[0]["name"], "ecosystem": packages[0]["ecosystem"]}}]},
headers={"User-Agent": "websafe-intel"},
)
@@ -61,7 +112,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
raise ValueError("OSV probe returned non-object payload")
return {"kind": kind, "items_seen": len(payload.get("results", []))}
if kind == "kev-json":
response = request("GET", source["url"])
response = request("GET", source["url"], source=source)
response.raise_for_status()
payload = response.json()
if not isinstance(payload, dict):
@@ -76,19 +127,37 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
api_key = os.environ.get("NVD_API_KEY")
if api_key:
headers["apiKey"] = api_key
response = request("GET", nvd_api.API_URL, headers=headers, params=params)
response = request("GET", nvd_api.API_URL, source=source, headers=headers, params=params)
response.raise_for_status()
payload = response.json()
if not isinstance(payload, dict):
raise ValueError("NVD probe returned non-object payload")
return {"kind": kind, "items_seen": len(payload.get("vulnerabilities", []))}
if kind == "rss-feed":
response = request("GET", source["url"])
response = request("GET", source["url"], source=source)
response.raise_for_status()
root = ET.fromstring(response.content)
return {"kind": kind, "items_seen": len(root.findall(".//item"))}
if kind == "atom-feed":
response = request("GET", source["url"], source=source)
response.raise_for_status()
root = ET.fromstring(response.content)
return {"kind": kind, "items_seen": len(root.findall(".//{http://www.w3.org/2005/Atom}entry"))}
if kind == "json-feed":
response = request("GET", source["url"], source=source)
response.raise_for_status()
payload = response.json()
items = payload.get("items") or payload.get("entries") or payload.get("advisories") or []
if not isinstance(items, list):
raise ValueError("JSON feed probe returned non-list items")
return {"kind": kind, "items_seen": len(items)}
if kind == "html-links":
response = request("GET", source["url"])
response = request("GET", source["url"], source=source)
response.raise_for_status()
html = response.text
return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
if kind == "vendor-index":
response = request("GET", source["url"], source=source)
response.raise_for_status()
html = response.text
return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
@@ -110,47 +179,59 @@ def collect_candidates(
since_dt: Optional[datetime] = None,
tier: Optional[str] = None,
include_undated: bool = False,
) -> Tuple[List[Candidate], List[str]]:
) -> Tuple[List[Candidate], List[Dict[str, Any]]]:
all_candidates: List[Candidate] = []
failures: List[str] = []
failures: List[Dict[str, Any]] = []
for system in source_map["systems"]:
if tier and system.get("tier") != tier:
continue
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket_name, []):
handler = HANDLERS.get(source["kind"])
if handler is None:
failures.append(f"Unsupported source kind {source['kind']} for {system['system_id']}")
continue
try:
items = handler(system, source)
for item in items:
if _passes_since(item, since_dt, include_undated):
all_candidates.append(item)
except Exception as exc:
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
handler = HANDLERS.get(source["kind"])
if handler is None:
failures.append(
{
"system_id": system["system_id"],
"display_name": system["display_name"],
"source_name": source["name"],
"source_kind": source["kind"],
"source_bucket": source.get("bucket_name"),
"category": "schema",
"exception": "UnsupportedSourceKind",
"message": f"Unsupported source kind {source['kind']}",
"status_code": None,
"url": source.get("url") or "",
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}",
}
)
continue
try:
items = handler(system, source)
for item in items:
if _passes_since(item, since_dt, include_undated):
all_candidates.append(item)
except Exception as exc:
failures.append(_build_failure(system, source, exc))
return all_candidates, failures
def probe_sources(
source_map: Dict[str, Any],
tier: Optional[str] = None,
) -> Tuple[List[Dict[str, Any]], List[str]]:
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
probes: List[Dict[str, Any]] = []
failures: List[str] = []
failures: List[Dict[str, Any]] = []
for system in source_map["systems"]:
if tier and system.get("tier") != tier:
continue
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
for source in system.get(bucket_name, []):
jobs.append((system, source))
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
jobs.append((system, source))
max_workers = min(16, max(4, len(jobs) or 1))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_map = {executor.submit(_probe_source, system, source): (system, source) for system, source in jobs}
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
for future in as_completed(future_map):
system, source = future_map[future]
try:
@@ -164,5 +245,15 @@ def probe_sources(
}
)
except Exception as exc:
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
failures.append(_build_failure(system, source, exc))
return probes, failures
def find_source(source_map: Dict[str, Any], system_id: str, source_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]] | None:
for system in source_map.get("systems", []) or []:
if system.get("system_id") != system_id:
continue
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=True):
if source.get("name") == source_name:
return system, source
return None