更新: 109 个文件 - 2026-03-18 10:55:52

2026-03-18 10:55:52 -07:00
--- a/scripts/intel/sources/nvd_api.py
+++ b/scripts/intel/sources/nvd_api.py
@@ -1,6 +1,8 @@
 from __future__ import annotations

 import os
+import threading
+import time
 from typing import Any, Dict, List

 import requests
@@ -11,6 +13,30 @@ from intel.utils import unique


 API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
+PUBLIC_INTERVAL_SECONDS = 7.0
+_NVD_RATE_LOCK = threading.Lock()
+_NVD_LAST_REQUEST = 0.0
+
+
+def _wait_for_slot() -> None:
+    global _NVD_LAST_REQUEST
+    if os.environ.get("NVD_API_KEY"):
+        return
+    with _NVD_RATE_LOCK:
+        elapsed = time.monotonic() - _NVD_LAST_REQUEST
+        if elapsed < PUBLIC_INTERVAL_SECONDS:
+            time.sleep(PUBLIC_INTERVAL_SECONDS - elapsed)
+        _NVD_LAST_REQUEST = time.monotonic()
+
+
+def request_nvd(source: Dict[str, Any], headers: Dict[str, Any], params: Dict[str, Any]) -> requests.Response:
+    _wait_for_slot()
+    response = request("GET", API_URL, source=source, headers=headers, params=params)
+    if response.status_code == 429 and not os.environ.get("NVD_API_KEY"):
+        time.sleep(PUBLIC_INTERVAL_SECONDS)
+        _wait_for_slot()
+        response = request("GET", API_URL, source=source, headers=headers, params=params)
+    return response


 def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
@@ -23,7 +49,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
    if api_key:
        headers["apiKey"] = api_key

-    response = request("GET", API_URL, source=source, headers=headers, params=params)
+    response = request_nvd(source, headers, params)
    response.raise_for_status()
    payload = response.json()

--- a/scripts/intel/sources/runner.py
+++ b/scripts/intel/sources/runner.py
@@ -127,7 +127,7 @@ def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, An
        api_key = os.environ.get("NVD_API_KEY")
        if api_key:
            headers["apiKey"] = api_key
-        response = request("GET", nvd_api.API_URL, source=source, headers=headers, params=params)
+        response = nvd_api.request_nvd(source, headers, params)
        response.raise_for_status()
        payload = response.json()
        if not isinstance(payload, dict):
@@ -160,7 +160,7 @@ def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, An
        response = request("GET", source["url"], source=source)
        response.raise_for_status()
        html = response.text
-        return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
+        return {"kind": kind, "items_seen": len(vendor_index.extract_links(html))}
    raise ValueError(f"Unsupported source kind {kind}")


--- a/scripts/intel/sources/vendor_index.py
+++ b/scripts/intel/sources/vendor_index.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import re
 from html import unescape
+from html.parser import HTMLParser
 from typing import Any, Dict, List
 from urllib.parse import urljoin

@@ -9,7 +10,42 @@ from intel.http_client import request
 from intel.models import Candidate
 from intel.utils import unique

-from .html_links import ANCHOR_RE, TAG_RE, canonicalize_url
+from .html_links import canonicalize_url
+
+
+class _AnchorCollector(HTMLParser):
+    def __init__(self) -> None:
+        super().__init__()
+        self.links: List[tuple[str, str]] = []
+        self._href: str | None = None
+        self._chunks: List[str] = []
+
+    def handle_starttag(self, tag: str, attrs) -> None:
+        if tag.lower() != "a":
+            return
+        href = dict(attrs).get("href")
+        if href:
+            self._href = href
+            self._chunks = []
+
+    def handle_data(self, data: str) -> None:
+        if self._href is not None:
+            self._chunks.append(data)
+
+    def handle_endtag(self, tag: str) -> None:
+        if tag.lower() != "a" or self._href is None:
+            return
+        text = unescape(" ".join(self._chunks)).strip()
+        self.links.append((self._href, text))
+        self._href = None
+        self._chunks = []
+
+
+def extract_links(html: str) -> List[tuple[str, str]]:
+    parser = _AnchorCollector()
+    parser.feed(html)
+    parser.close()
+    return parser.links


 def _matches(value: str, patterns: List[str]) -> bool:
@@ -29,9 +65,9 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:

    candidates: List[Candidate] = []
    seen = set()
-    for href, text in ANCHOR_RE.findall(html):
+    for href, text in extract_links(html):
        absolute = canonicalize_url(urljoin(source["url"], href))
-        title = unescape(TAG_RE.sub(" ", text)).strip()
+        title = unescape(text).strip()
        if not title:
            continue
        haystack = " ".join(filter(None, [absolute, title])).lower()