更新: 1 个文件 - 2026-03-18 16:13:21

这个提交包含在:
hao
2026-03-18 16:13:21 -07:00
父节点 b9c67410c8
当前提交 df455d7fb5

查看文件

@@ -28,6 +28,9 @@ HANDLERS = {
"vendor-index": vendor_index.fetch, "vendor-index": vendor_index.fetch,
} }
DEFAULT_MAX_WORKERS = 20
MAX_WORKER_CAP = 32
def _failure_category(exc: Exception) -> str: def _failure_category(exc: Exception) -> str:
if isinstance(exc, requests.exceptions.SSLError): if isinstance(exc, requests.exceptions.SSLError):
@@ -74,6 +77,68 @@ def build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception
} }
def _collect_jobs(
source_map: Dict[str, Any],
*,
tier: Optional[str] = None,
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
for system in source_map["systems"]:
if tier and system.get("tier") != tier:
continue
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
jobs.append((system, source))
return jobs
def _max_workers(job_count: int) -> int:
if job_count <= 0:
return 4
configured = os.environ.get("WEBSAFE_INTEL_MAX_WORKERS")
if configured:
try:
value = int(configured)
except ValueError:
value = DEFAULT_MAX_WORKERS
else:
value = DEFAULT_MAX_WORKERS
value = max(4, min(MAX_WORKER_CAP, value))
return min(value, job_count)
def _collect_source_candidates(
system: Dict[str, Any],
source: Dict[str, Any],
*,
since_dt: Optional[datetime],
include_undated: bool,
) -> Tuple[List[Candidate], Optional[Dict[str, Any]]]:
handler = HANDLERS.get(source["kind"])
if handler is None:
return (
[],
{
"system_id": system["system_id"],
"display_name": system["display_name"],
"source_name": source["name"],
"source_kind": source["kind"],
"source_bucket": source.get("bucket_name"),
"category": "schema",
"exception": "UnsupportedSourceKind",
"message": f"Unsupported source kind {source['kind']}",
"status_code": None,
"url": source.get("url") or "",
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}",
},
)
try:
items = handler(system, source)
filtered = [item for item in items if _passes_since(item, since_dt, include_undated)]
return filtered, None
except Exception as exc:
return [], build_failure(system, source, exc)
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]: def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
kind = source["kind"] kind = source["kind"]
if kind == "ghsa-global": if kind == "ghsa-global":
@@ -187,36 +252,26 @@ def collect_candidates(
) -> Tuple[List[Candidate], List[Dict[str, Any]]]: ) -> Tuple[List[Candidate], List[Dict[str, Any]]]:
all_candidates: List[Candidate] = [] all_candidates: List[Candidate] = []
failures: List[Dict[str, Any]] = [] failures: List[Dict[str, Any]] = []
jobs = _collect_jobs(source_map, tier=tier)
for system in source_map["systems"]: with ThreadPoolExecutor(max_workers=_max_workers(len(jobs))) as executor:
if tier and system.get("tier") != tier: future_map = {
continue executor.submit(
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False): _collect_source_candidates,
handler = HANDLERS.get(source["kind"]) system,
if handler is None: source,
failures.append( since_dt=since_dt,
{ include_undated=include_undated,
"system_id": system["system_id"], ): (system, source)
"display_name": system["display_name"], for system, source in jobs
"source_name": source["name"], }
"source_kind": source["kind"], for future in as_completed(future_map):
"source_bucket": source.get("bucket_name"), items, failure = future.result()
"category": "schema", if items:
"exception": "UnsupportedSourceKind", all_candidates.extend(items)
"message": f"Unsupported source kind {source['kind']}", if failure:
"status_code": None, failures.append(failure)
"url": source.get("url") or "", all_candidates.sort(key=lambda item: (item.system_id, item.published_at or "", item.title, item.source_name))
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}", failures.sort(key=lambda item: (item.get("system_id", ""), item.get("source_name", ""), item.get("category", "")))
}
)
continue
try:
items = handler(system, source)
for item in items:
if _passes_since(item, since_dt, include_undated):
all_candidates.append(item)
except Exception as exc:
failures.append(build_failure(system, source, exc))
return all_candidates, failures return all_candidates, failures
@@ -224,18 +279,10 @@ def probe_sources(
source_map: Dict[str, Any], source_map: Dict[str, Any],
tier: Optional[str] = None, tier: Optional[str] = None,
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = [] jobs = _collect_jobs(source_map, tier=tier)
probes: List[Dict[str, Any]] = [] probes: List[Dict[str, Any]] = []
failures: List[Dict[str, Any]] = [] failures: List[Dict[str, Any]] = []
with ThreadPoolExecutor(max_workers=_max_workers(len(jobs))) as executor:
for system in source_map["systems"]:
if tier and system.get("tier") != tier:
continue
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
jobs.append((system, source))
max_workers = min(16, max(4, len(jobs) or 1))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs} future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
for future in as_completed(future_map): for future in as_completed(future_map):
system, source = future_map[future] system, source = future_map[future]
@@ -251,6 +298,8 @@ def probe_sources(
) )
except Exception as exc: except Exception as exc:
failures.append(build_failure(system, source, exc)) failures.append(build_failure(system, source, exc))
probes.sort(key=lambda item: (item["system_id"], item["source_name"]))
failures.sort(key=lambda item: (item.get("system_id", ""), item.get("source_name", ""), item.get("category", "")))
return probes, failures return probes, failures