kb: expand authorized lab coverage and intel automation
这个提交包含在:
@@ -0,0 +1 @@
|
||||
"""Source adapters for advisory ingestion."""
|
||||
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
keywords = {kw.lower() for kw in source.get("keywords") or system.get("kev_keywords", []) or [system["display_name"]]}
|
||||
candidates: List[Candidate] = []
|
||||
for vuln in payload.get("vulnerabilities", []):
|
||||
haystack = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
vuln.get("vendorProject"),
|
||||
vuln.get("product"),
|
||||
vuln.get("vulnerabilityName"),
|
||||
vuln.get("shortDescription"),
|
||||
],
|
||||
)
|
||||
).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
|
||||
cve = vuln.get("cveID")
|
||||
refs = [source["url"]]
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=source["url"],
|
||||
title=vuln.get("vulnerabilityName") or cve or f"KEV advisory for {system['display_name']}",
|
||||
published_at=vuln.get("dateAdded"),
|
||||
updated_at=vuln.get("dueDate"),
|
||||
summary=vuln.get("shortDescription") or "",
|
||||
severity="critical",
|
||||
exploit_status="known_exploited",
|
||||
aliases=unique([cve]),
|
||||
cve_ids=[cve] if cve else [],
|
||||
references=refs,
|
||||
raw=vuln,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
API_URL = "https://api.github.com/advisories"
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
page = 1
|
||||
packages = {
|
||||
item["name"].lower(): item["ecosystem"].lower()
|
||||
for item in system.get("package_names", [])
|
||||
if item.get("name") and item.get("ecosystem")
|
||||
}
|
||||
keyword_set = {value.lower() for value in system.get("ghsa_keywords", [])}
|
||||
candidates: List[Candidate] = []
|
||||
|
||||
while True:
|
||||
response = requests.get(
|
||||
API_URL,
|
||||
headers=headers,
|
||||
params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")},
|
||||
timeout=30,
|
||||
)
|
||||
if response.status_code == 403 and "rate limit" in response.text.lower():
|
||||
raise requests.HTTPError("GitHub advisory rate limit exceeded; set GITHUB_TOKEN for higher quota", response=response)
|
||||
response.raise_for_status()
|
||||
advisories = response.json()
|
||||
if not advisories:
|
||||
break
|
||||
|
||||
for advisory in advisories:
|
||||
matched_vulns = []
|
||||
for vuln in advisory.get("vulnerabilities", []):
|
||||
package = (vuln.get("package") or {})
|
||||
package_name = (package.get("name") or "").lower()
|
||||
ecosystem = (package.get("ecosystem") or "").lower()
|
||||
if package_name in packages and packages[package_name] == ecosystem:
|
||||
matched_vulns.append(vuln)
|
||||
|
||||
haystack = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
advisory.get("summary"),
|
||||
advisory.get("description"),
|
||||
advisory.get("ghsa_id"),
|
||||
advisory.get("cve_id"),
|
||||
],
|
||||
)
|
||||
).lower()
|
||||
if not matched_vulns and keyword_set and not any(keyword in haystack for keyword in keyword_set):
|
||||
continue
|
||||
if not matched_vulns and not keyword_set:
|
||||
continue
|
||||
|
||||
affected_versions = []
|
||||
fixed_versions = []
|
||||
package_name = None
|
||||
for vuln in matched_vulns:
|
||||
if vuln.get("vulnerable_version_range"):
|
||||
affected_versions.append(vuln["vulnerable_version_range"])
|
||||
patched = vuln.get("first_patched_version") or {}
|
||||
if patched.get("identifier"):
|
||||
fixed_versions.append(patched["identifier"])
|
||||
if not package_name and vuln.get("package"):
|
||||
package_name = vuln["package"].get("name")
|
||||
|
||||
aliases = unique(
|
||||
[
|
||||
advisory.get("ghsa_id"),
|
||||
advisory.get("cve_id"),
|
||||
*(advisory.get("identifiers") or []),
|
||||
]
|
||||
)
|
||||
cve_ids = [advisory["cve_id"]] if advisory.get("cve_id") else []
|
||||
ghsa_ids = [advisory["ghsa_id"]] if advisory.get("ghsa_id") else []
|
||||
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=advisory.get("html_url") or API_URL,
|
||||
title=advisory.get("summary") or advisory.get("ghsa_id") or "GitHub advisory",
|
||||
published_at=advisory.get("published_at"),
|
||||
updated_at=advisory.get("updated_at"),
|
||||
summary=advisory.get("description") or "",
|
||||
severity=(advisory.get("severity") or "unknown").lower(),
|
||||
aliases=aliases,
|
||||
cve_ids=cve_ids,
|
||||
ghsa_ids=ghsa_ids,
|
||||
affected_versions=unique(affected_versions),
|
||||
fixed_versions=unique(fixed_versions),
|
||||
package_name=package_name,
|
||||
references=[advisory.get("html_url")] if advisory.get("html_url") else [],
|
||||
raw=advisory,
|
||||
)
|
||||
)
|
||||
|
||||
page += 1
|
||||
if len(advisories) < 100:
|
||||
break
|
||||
|
||||
return candidates
|
||||
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from html import unescape
|
||||
from typing import Any, Dict, List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
ANCHOR_RE = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGNORECASE | re.DOTALL)
|
||||
TAG_RE = re.compile(r"<[^>]+>")
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
seen = set()
|
||||
for href, text in ANCHOR_RE.findall(html):
|
||||
title = unescape(TAG_RE.sub(" ", text)).strip()
|
||||
if not title:
|
||||
continue
|
||||
absolute = urljoin(source["url"], href)
|
||||
haystack = f"{title} {absolute}".lower()
|
||||
if keywords and not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
if absolute in seen:
|
||||
continue
|
||||
seen.add(absolute)
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=absolute,
|
||||
title=title,
|
||||
summary="",
|
||||
severity="unknown",
|
||||
references=unique([absolute]),
|
||||
raw={"href": absolute, "title": title},
|
||||
)
|
||||
)
|
||||
if len(candidates) >= source.get("max_items", 50):
|
||||
break
|
||||
return candidates
|
||||
68
scripts/intel/sources/nvd_api.py
普通文件
68
scripts/intel/sources/nvd_api.py
普通文件
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
params = {
|
||||
"keywordSearch": source.get("keyword") or system["display_name"],
|
||||
"resultsPerPage": source.get("results_per_page", 50),
|
||||
}
|
||||
headers = {"User-Agent": "websafe-intel"}
|
||||
api_key = os.environ.get("NVD_API_KEY")
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
|
||||
response = requests.get(API_URL, headers=headers, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
for item in payload.get("vulnerabilities", []):
|
||||
cve = item.get("cve", {})
|
||||
descriptions = cve.get("descriptions", [])
|
||||
description = next((d.get("value") for d in descriptions if d.get("lang") == "en"), "")
|
||||
metrics = cve.get("metrics", {})
|
||||
severity = "unknown"
|
||||
cvss_score = None
|
||||
for key in ("cvssMetricV31", "cvssMetricV30", "cvssMetricV2"):
|
||||
entries = metrics.get(key, [])
|
||||
if entries:
|
||||
data = entries[0].get("cvssData", {})
|
||||
severity = (entries[0].get("baseSeverity") or data.get("baseSeverity") or "unknown").lower()
|
||||
cvss_score = data.get("baseScore")
|
||||
break
|
||||
|
||||
refs = [ref.get("url") for ref in cve.get("references", []) if ref.get("url")]
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=refs[0] if refs else API_URL,
|
||||
title=cve.get("id") or f"NVD advisory for {system['display_name']}",
|
||||
published_at=cve.get("published"),
|
||||
updated_at=cve.get("lastModified"),
|
||||
summary=description or "",
|
||||
severity=severity,
|
||||
cvss_score=cvss_score,
|
||||
aliases=unique([cve.get("id")]),
|
||||
cve_ids=[cve.get("id")] if cve.get("id") else [],
|
||||
references=refs,
|
||||
raw=item,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
154
scripts/intel/sources/osv_api.py
普通文件
154
scripts/intel/sources/osv_api.py
普通文件
@@ -0,0 +1,154 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
QUERY_BATCH_URL = "https://api.osv.dev/v1/querybatch"
|
||||
DETAIL_URL = "https://api.osv.dev/v1/vulns/{vuln_id}"
|
||||
CVSS_SCORE_RE = re.compile(r"/CVSS:3\.[01]/AV:[A-Z]/AC:[A-Z]/PR:[A-Z]/UI:[A-Z]/S:[A-Z]/C:[A-Z]/I:[A-Z]/A:[A-Z]")
|
||||
NUMERIC_SCORE_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
|
||||
|
||||
|
||||
def _fetch_detail(session: requests.Session, vuln_id: str) -> Dict[str, Any]:
|
||||
response = session.get(
|
||||
DETAIL_URL.format(vuln_id=vuln_id),
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _fixed_versions(vuln: Dict[str, Any]) -> List[str]:
|
||||
fixed = []
|
||||
for affected in vuln.get("affected", []):
|
||||
for rng in affected.get("ranges", []):
|
||||
for event in rng.get("events", []):
|
||||
if event.get("fixed"):
|
||||
fixed.append(event["fixed"])
|
||||
return unique(fixed)
|
||||
|
||||
|
||||
def _affected_versions(vuln: Dict[str, Any]) -> List[str]:
|
||||
versions = []
|
||||
ranges = []
|
||||
for affected in vuln.get("affected", []):
|
||||
versions.extend(affected.get("versions", [])[:20])
|
||||
for rng in affected.get("ranges", []):
|
||||
introduced = None
|
||||
fixed = None
|
||||
last_affected = None
|
||||
limit = None
|
||||
for event in rng.get("events", []):
|
||||
introduced = introduced or event.get("introduced")
|
||||
fixed = fixed or event.get("fixed")
|
||||
last_affected = last_affected or event.get("last_affected")
|
||||
limit = limit or event.get("limit")
|
||||
if introduced or fixed or last_affected or limit:
|
||||
parts = []
|
||||
if introduced:
|
||||
parts.append(f"introduced={introduced}")
|
||||
if last_affected:
|
||||
parts.append(f"last_affected={last_affected}")
|
||||
if fixed:
|
||||
parts.append(f"fixed<{fixed}")
|
||||
if limit:
|
||||
parts.append(f"limit<{limit}")
|
||||
ranges.append(", ".join(parts))
|
||||
return unique(versions + ranges)
|
||||
|
||||
|
||||
def _severity(vuln: Dict[str, Any]) -> tuple[str, float | None]:
|
||||
best_score = None
|
||||
for sev in vuln.get("severity", []):
|
||||
score = sev.get("score", "")
|
||||
match = NUMERIC_SCORE_RE.search(score)
|
||||
if match:
|
||||
try:
|
||||
best_score = float(match.group(1))
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
if best_score is None:
|
||||
return "unknown", None
|
||||
if best_score >= 9.0:
|
||||
return "critical", best_score
|
||||
if best_score >= 7.0:
|
||||
return "high", best_score
|
||||
if best_score >= 4.0:
|
||||
return "medium", best_score
|
||||
return "low", best_score
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
packages = system.get("package_names", [])
|
||||
if not packages:
|
||||
return []
|
||||
|
||||
queries = [{"package": {"name": pkg["name"], "ecosystem": pkg["ecosystem"]}} for pkg in packages]
|
||||
session = requests.Session()
|
||||
response = session.post(
|
||||
QUERY_BATCH_URL,
|
||||
json={"queries": queries},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
timeout=30,
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
detail_cache: Dict[str, Dict[str, Any]] = {}
|
||||
candidates: List[Candidate] = []
|
||||
for package, result in zip(packages, payload.get("results", [])):
|
||||
for summary in result.get("vulns", []):
|
||||
vuln_id = summary.get("id")
|
||||
if not vuln_id:
|
||||
continue
|
||||
if vuln_id not in detail_cache:
|
||||
detail_cache[vuln_id] = _fetch_detail(session, vuln_id)
|
||||
vuln = detail_cache[vuln_id]
|
||||
|
||||
aliases = unique(vuln.get("aliases", []) + [vuln.get("id")])
|
||||
refs = [ref.get("url") for ref in vuln.get("references", []) if ref.get("url")]
|
||||
severity, cvss_score = _severity(vuln)
|
||||
package_name = package["name"]
|
||||
if not package_name:
|
||||
for affected in vuln.get("affected", []):
|
||||
pkg = affected.get("package") or {}
|
||||
if pkg.get("name"):
|
||||
package_name = pkg["name"]
|
||||
break
|
||||
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=refs[0] if refs else DETAIL_URL.format(vuln_id=vuln_id),
|
||||
title=vuln.get("summary") or vuln.get("id") or f"OSV advisory for {package['name']}",
|
||||
published_at=vuln.get("published"),
|
||||
updated_at=vuln.get("modified"),
|
||||
summary=vuln.get("details") or "",
|
||||
severity=severity,
|
||||
cvss_score=cvss_score,
|
||||
aliases=aliases,
|
||||
cve_ids=[item for item in aliases if item and item.startswith("CVE-")],
|
||||
ghsa_ids=[item for item in aliases if item and item.startswith("GHSA-")],
|
||||
osv_ids=[vuln.get("id")] if vuln.get("id") else [],
|
||||
affected_versions=_affected_versions(vuln),
|
||||
fixed_versions=_fixed_versions(vuln),
|
||||
package_name=package_name,
|
||||
references=refs,
|
||||
raw=vuln,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -0,0 +1,51 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from intel.models import Candidate
|
||||
|
||||
|
||||
def _text(node: ET.Element, name: str) -> str:
|
||||
child = node.find(name)
|
||||
return child.text.strip() if child is not None and child.text else ""
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
items = root.findall(".//item")
|
||||
candidates: List[Candidate] = []
|
||||
for item in items[: source.get("max_items", 50)]:
|
||||
title = _text(item, "title")
|
||||
link = _text(item, "link") or source["url"]
|
||||
description = _text(item, "description")
|
||||
if keywords:
|
||||
haystack = " ".join([title, description]).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=link,
|
||||
title=title or f"RSS entry for {system['display_name']}",
|
||||
published_at=_text(item, "pubDate"),
|
||||
updated_at=_text(item, "pubDate"),
|
||||
summary=description,
|
||||
severity="unknown",
|
||||
references=[link],
|
||||
raw={"title": title, "link": link},
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
57
scripts/intel/sources/runner.py
普通文件
57
scripts/intel/sources/runner.py
普通文件
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from intel.models import Candidate
|
||||
from intel.utils import parse_dt
|
||||
|
||||
from . import cisa_kev, github_global, html_links, nvd_api, osv_api, rss_feed
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"ghsa-global": github_global.fetch,
|
||||
"osv-batch": osv_api.fetch,
|
||||
"kev-json": cisa_kev.fetch,
|
||||
"nvd-search": nvd_api.fetch,
|
||||
"rss-feed": rss_feed.fetch,
|
||||
"html-links": html_links.fetch,
|
||||
}
|
||||
|
||||
|
||||
def _passes_since(candidate: Candidate, since_dt: Optional[datetime], include_undated: bool) -> bool:
|
||||
if since_dt is None:
|
||||
return True
|
||||
timestamps = [parse_dt(candidate.updated_at), parse_dt(candidate.published_at)]
|
||||
valid = [item for item in timestamps if item is not None]
|
||||
if not valid:
|
||||
return include_undated
|
||||
return max(valid) >= since_dt
|
||||
|
||||
|
||||
def collect_candidates(
|
||||
source_map: Dict[str, Any],
|
||||
since_dt: Optional[datetime] = None,
|
||||
tier: Optional[str] = None,
|
||||
include_undated: bool = False,
|
||||
) -> Tuple[List[Candidate], List[str]]:
|
||||
all_candidates: List[Candidate] = []
|
||||
failures: List[str] = []
|
||||
|
||||
for system in source_map["systems"]:
|
||||
if tier and system.get("tier") != tier:
|
||||
continue
|
||||
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
|
||||
for source in system.get(bucket_name, []):
|
||||
handler = HANDLERS.get(source["kind"])
|
||||
if handler is None:
|
||||
failures.append(f"Unsupported source kind {source['kind']} for {system['system_id']}")
|
||||
continue
|
||||
try:
|
||||
items = handler(system, source)
|
||||
for item in items:
|
||||
if _passes_since(item, since_dt, include_undated):
|
||||
all_candidates.append(item)
|
||||
except Exception as exc:
|
||||
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
|
||||
return all_candidates, failures
|
||||
在新工单中引用
屏蔽一个用户