from __future__ import annotations import os from typing import Any, Dict, List import requests from intel.http_client import request from intel.models import Candidate from intel.utils import unique API_URL = "https://api.github.com/advisories" def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]: headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"} token = os.environ.get("GITHUB_TOKEN") if token: headers["Authorization"] = f"Bearer {token}" page = 1 packages = { item["name"].lower(): item["ecosystem"].lower() for item in system.get("package_names", []) if item.get("name") and item.get("ecosystem") } keyword_set = {value.lower() for value in system.get("ghsa_keywords", [])} candidates: List[Candidate] = [] while True: response = request( "GET", API_URL, source=source, headers=headers, params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")}, ) if response.status_code == 403 and "rate limit" in response.text.lower(): raise requests.HTTPError("GitHub advisory rate limit exceeded; set GITHUB_TOKEN for higher quota", response=response) response.raise_for_status() advisories = response.json() if not advisories: break for advisory in advisories: matched_vulns = [] for vuln in advisory.get("vulnerabilities", []): package = (vuln.get("package") or {}) package_name = (package.get("name") or "").lower() ecosystem = (package.get("ecosystem") or "").lower() if package_name in packages and packages[package_name] == ecosystem: matched_vulns.append(vuln) haystack = " ".join( filter( None, [ advisory.get("summary"), advisory.get("description"), advisory.get("ghsa_id"), advisory.get("cve_id"), ], ) ).lower() if not matched_vulns and keyword_set and not any(keyword in haystack for keyword in keyword_set): continue if not matched_vulns and not keyword_set: continue affected_versions = [] fixed_versions = [] package_name = None for vuln in matched_vulns: if vuln.get("vulnerable_version_range"): affected_versions.append(vuln["vulnerable_version_range"]) patched = vuln.get("first_patched_version") or {} if patched.get("identifier"): fixed_versions.append(patched["identifier"]) if not package_name and vuln.get("package"): package_name = vuln["package"].get("name") aliases = unique( [ advisory.get("ghsa_id"), advisory.get("cve_id"), *(advisory.get("identifiers") or []), ] ) cve_ids = [advisory["cve_id"]] if advisory.get("cve_id") else [] ghsa_ids = [advisory["ghsa_id"]] if advisory.get("ghsa_id") else [] candidates.append( Candidate( system_id=system["system_id"], display_name=system["display_name"], category=system["category"], advisory_mode=source.get("advisory_mode", "core"), source_kind=source["kind"], source_name=source["name"], source_confidence=source["confidence"], source_url=advisory.get("html_url") or API_URL, title=advisory.get("summary") or advisory.get("ghsa_id") or "GitHub advisory", published_at=advisory.get("published_at"), updated_at=advisory.get("updated_at"), summary=advisory.get("description") or "", severity=(advisory.get("severity") or "unknown").lower(), aliases=aliases, cve_ids=cve_ids, ghsa_ids=ghsa_ids, affected_versions=unique(affected_versions), fixed_versions=unique(fixed_versions), package_name=package_name, references=[advisory.get("html_url")] if advisory.get("html_url") else [], raw=advisory, ) ) page += 1 if len(advisories) < 100: break return candidates