更新: 1 个文件 - 2026-03-18 16:13:21
这个提交包含在:
@@ -28,6 +28,9 @@ HANDLERS = {
|
|||||||
"vendor-index": vendor_index.fetch,
|
"vendor-index": vendor_index.fetch,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEFAULT_MAX_WORKERS = 20
|
||||||
|
MAX_WORKER_CAP = 32
|
||||||
|
|
||||||
|
|
||||||
def _failure_category(exc: Exception) -> str:
|
def _failure_category(exc: Exception) -> str:
|
||||||
if isinstance(exc, requests.exceptions.SSLError):
|
if isinstance(exc, requests.exceptions.SSLError):
|
||||||
@@ -74,6 +77,68 @@ def build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_jobs(
|
||||||
|
source_map: Dict[str, Any],
|
||||||
|
*,
|
||||||
|
tier: Optional[str] = None,
|
||||||
|
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
||||||
|
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
|
||||||
|
for system in source_map["systems"]:
|
||||||
|
if tier and system.get("tier") != tier:
|
||||||
|
continue
|
||||||
|
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
|
||||||
|
jobs.append((system, source))
|
||||||
|
return jobs
|
||||||
|
|
||||||
|
|
||||||
|
def _max_workers(job_count: int) -> int:
|
||||||
|
if job_count <= 0:
|
||||||
|
return 4
|
||||||
|
configured = os.environ.get("WEBSAFE_INTEL_MAX_WORKERS")
|
||||||
|
if configured:
|
||||||
|
try:
|
||||||
|
value = int(configured)
|
||||||
|
except ValueError:
|
||||||
|
value = DEFAULT_MAX_WORKERS
|
||||||
|
else:
|
||||||
|
value = DEFAULT_MAX_WORKERS
|
||||||
|
value = max(4, min(MAX_WORKER_CAP, value))
|
||||||
|
return min(value, job_count)
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_source_candidates(
|
||||||
|
system: Dict[str, Any],
|
||||||
|
source: Dict[str, Any],
|
||||||
|
*,
|
||||||
|
since_dt: Optional[datetime],
|
||||||
|
include_undated: bool,
|
||||||
|
) -> Tuple[List[Candidate], Optional[Dict[str, Any]]]:
|
||||||
|
handler = HANDLERS.get(source["kind"])
|
||||||
|
if handler is None:
|
||||||
|
return (
|
||||||
|
[],
|
||||||
|
{
|
||||||
|
"system_id": system["system_id"],
|
||||||
|
"display_name": system["display_name"],
|
||||||
|
"source_name": source["name"],
|
||||||
|
"source_kind": source["kind"],
|
||||||
|
"source_bucket": source.get("bucket_name"),
|
||||||
|
"category": "schema",
|
||||||
|
"exception": "UnsupportedSourceKind",
|
||||||
|
"message": f"Unsupported source kind {source['kind']}",
|
||||||
|
"status_code": None,
|
||||||
|
"url": source.get("url") or "",
|
||||||
|
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
items = handler(system, source)
|
||||||
|
filtered = [item for item in items if _passes_since(item, since_dt, include_undated)]
|
||||||
|
return filtered, None
|
||||||
|
except Exception as exc:
|
||||||
|
return [], build_failure(system, source, exc)
|
||||||
|
|
||||||
|
|
||||||
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
|
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
kind = source["kind"]
|
kind = source["kind"]
|
||||||
if kind == "ghsa-global":
|
if kind == "ghsa-global":
|
||||||
@@ -187,36 +252,26 @@ def collect_candidates(
|
|||||||
) -> Tuple[List[Candidate], List[Dict[str, Any]]]:
|
) -> Tuple[List[Candidate], List[Dict[str, Any]]]:
|
||||||
all_candidates: List[Candidate] = []
|
all_candidates: List[Candidate] = []
|
||||||
failures: List[Dict[str, Any]] = []
|
failures: List[Dict[str, Any]] = []
|
||||||
|
jobs = _collect_jobs(source_map, tier=tier)
|
||||||
for system in source_map["systems"]:
|
with ThreadPoolExecutor(max_workers=_max_workers(len(jobs))) as executor:
|
||||||
if tier and system.get("tier") != tier:
|
future_map = {
|
||||||
continue
|
executor.submit(
|
||||||
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
|
_collect_source_candidates,
|
||||||
handler = HANDLERS.get(source["kind"])
|
system,
|
||||||
if handler is None:
|
source,
|
||||||
failures.append(
|
since_dt=since_dt,
|
||||||
{
|
include_undated=include_undated,
|
||||||
"system_id": system["system_id"],
|
): (system, source)
|
||||||
"display_name": system["display_name"],
|
for system, source in jobs
|
||||||
"source_name": source["name"],
|
}
|
||||||
"source_kind": source["kind"],
|
for future in as_completed(future_map):
|
||||||
"source_bucket": source.get("bucket_name"),
|
items, failure = future.result()
|
||||||
"category": "schema",
|
if items:
|
||||||
"exception": "UnsupportedSourceKind",
|
all_candidates.extend(items)
|
||||||
"message": f"Unsupported source kind {source['kind']}",
|
if failure:
|
||||||
"status_code": None,
|
failures.append(failure)
|
||||||
"url": source.get("url") or "",
|
all_candidates.sort(key=lambda item: (item.system_id, item.published_at or "", item.title, item.source_name))
|
||||||
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}",
|
failures.sort(key=lambda item: (item.get("system_id", ""), item.get("source_name", ""), item.get("category", "")))
|
||||||
}
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
items = handler(system, source)
|
|
||||||
for item in items:
|
|
||||||
if _passes_since(item, since_dt, include_undated):
|
|
||||||
all_candidates.append(item)
|
|
||||||
except Exception as exc:
|
|
||||||
failures.append(build_failure(system, source, exc))
|
|
||||||
return all_candidates, failures
|
return all_candidates, failures
|
||||||
|
|
||||||
|
|
||||||
@@ -224,18 +279,10 @@ def probe_sources(
|
|||||||
source_map: Dict[str, Any],
|
source_map: Dict[str, Any],
|
||||||
tier: Optional[str] = None,
|
tier: Optional[str] = None,
|
||||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||||
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
|
jobs = _collect_jobs(source_map, tier=tier)
|
||||||
probes: List[Dict[str, Any]] = []
|
probes: List[Dict[str, Any]] = []
|
||||||
failures: List[Dict[str, Any]] = []
|
failures: List[Dict[str, Any]] = []
|
||||||
|
with ThreadPoolExecutor(max_workers=_max_workers(len(jobs))) as executor:
|
||||||
for system in source_map["systems"]:
|
|
||||||
if tier and system.get("tier") != tier:
|
|
||||||
continue
|
|
||||||
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
|
|
||||||
jobs.append((system, source))
|
|
||||||
|
|
||||||
max_workers = min(16, max(4, len(jobs) or 1))
|
|
||||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
||||||
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
|
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
|
||||||
for future in as_completed(future_map):
|
for future in as_completed(future_map):
|
||||||
system, source = future_map[future]
|
system, source = future_map[future]
|
||||||
@@ -251,6 +298,8 @@ def probe_sources(
|
|||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
failures.append(build_failure(system, source, exc))
|
failures.append(build_failure(system, source, exc))
|
||||||
|
probes.sort(key=lambda item: (item["system_id"], item["source_name"]))
|
||||||
|
failures.sort(key=lambda item: (item.get("system_id", ""), item.get("source_name", ""), item.get("category", "")))
|
||||||
return probes, failures
|
return probes, failures
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
在新工单中引用
屏蔽一个用户