kb: expand authorized lab coverage and intel automation

2026-03-16 22:04:51 -07:00
--- a/scripts/intel/sources/github_global.py
+++ b/scripts/intel/sources/github_global.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+import os
+from typing import Any, Dict, List
+
+import requests
+
+from intel.models import Candidate
+from intel.utils import unique
+
+
+API_URL = "https://api.github.com/advisories"
+
+
+def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
+    headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
+    token = os.environ.get("GITHUB_TOKEN")
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    page = 1
+    packages = {
+        item["name"].lower(): item["ecosystem"].lower()
+        for item in system.get("package_names", [])
+        if item.get("name") and item.get("ecosystem")
+    }
+    keyword_set = {value.lower() for value in system.get("ghsa_keywords", [])}
+    candidates: List[Candidate] = []
+
+    while True:
+        response = requests.get(
+            API_URL,
+            headers=headers,
+            params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")},
+            timeout=30,
+        )
+        if response.status_code == 403 and "rate limit" in response.text.lower():
+            raise requests.HTTPError("GitHub advisory rate limit exceeded; set GITHUB_TOKEN for higher quota", response=response)
+        response.raise_for_status()
+        advisories = response.json()
+        if not advisories:
+            break
+
+        for advisory in advisories:
+            matched_vulns = []
+            for vuln in advisory.get("vulnerabilities", []):
+                package = (vuln.get("package") or {})
+                package_name = (package.get("name") or "").lower()
+                ecosystem = (package.get("ecosystem") or "").lower()
+                if package_name in packages and packages[package_name] == ecosystem:
+                    matched_vulns.append(vuln)
+
+            haystack = " ".join(
+                filter(
+                    None,
+                    [
+                        advisory.get("summary"),
+                        advisory.get("description"),
+                        advisory.get("ghsa_id"),
+                        advisory.get("cve_id"),
+                    ],
+                )
+            ).lower()
+            if not matched_vulns and keyword_set and not any(keyword in haystack for keyword in keyword_set):
+                continue
+            if not matched_vulns and not keyword_set:
+                continue
+
+            affected_versions = []
+            fixed_versions = []
+            package_name = None
+            for vuln in matched_vulns:
+                if vuln.get("vulnerable_version_range"):
+                    affected_versions.append(vuln["vulnerable_version_range"])
+                patched = vuln.get("first_patched_version") or {}
+                if patched.get("identifier"):
+                    fixed_versions.append(patched["identifier"])
+                if not package_name and vuln.get("package"):
+                    package_name = vuln["package"].get("name")
+
+            aliases = unique(
+                [
+                    advisory.get("ghsa_id"),
+                    advisory.get("cve_id"),
+                    *(advisory.get("identifiers") or []),
+                ]
+            )
+            cve_ids = [advisory["cve_id"]] if advisory.get("cve_id") else []
+            ghsa_ids = [advisory["ghsa_id"]] if advisory.get("ghsa_id") else []
+
+            candidates.append(
+                Candidate(
+                    system_id=system["system_id"],
+                    display_name=system["display_name"],
+                    category=system["category"],
+                    advisory_mode=source.get("advisory_mode", "core"),
+                    source_kind=source["kind"],
+                    source_name=source["name"],
+                    source_confidence=source["confidence"],
+                    source_url=advisory.get("html_url") or API_URL,
+                    title=advisory.get("summary") or advisory.get("ghsa_id") or "GitHub advisory",
+                    published_at=advisory.get("published_at"),
+                    updated_at=advisory.get("updated_at"),
+                    summary=advisory.get("description") or "",
+                    severity=(advisory.get("severity") or "unknown").lower(),
+                    aliases=aliases,
+                    cve_ids=cve_ids,
+                    ghsa_ids=ghsa_ids,
+                    affected_versions=unique(affected_versions),
+                    fixed_versions=unique(fixed_versions),
+                    package_name=package_name,
+                    references=[advisory.get("html_url")] if advisory.get("html_url") else [],
+                    raw=advisory,
+                )
+            )
+
+        page += 1
+        if len(advisories) < 100:
+            break
+
+    return candidates