更新: 109 个文件 - 2026-03-18 10:55:52
这个提交包含在:
@@ -1,6 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
@@ -11,6 +13,30 @@ from intel.utils import unique
|
||||
|
||||
|
||||
API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"
|
||||
PUBLIC_INTERVAL_SECONDS = 7.0
|
||||
_NVD_RATE_LOCK = threading.Lock()
|
||||
_NVD_LAST_REQUEST = 0.0
|
||||
|
||||
|
||||
def _wait_for_slot() -> None:
|
||||
global _NVD_LAST_REQUEST
|
||||
if os.environ.get("NVD_API_KEY"):
|
||||
return
|
||||
with _NVD_RATE_LOCK:
|
||||
elapsed = time.monotonic() - _NVD_LAST_REQUEST
|
||||
if elapsed < PUBLIC_INTERVAL_SECONDS:
|
||||
time.sleep(PUBLIC_INTERVAL_SECONDS - elapsed)
|
||||
_NVD_LAST_REQUEST = time.monotonic()
|
||||
|
||||
|
||||
def request_nvd(source: Dict[str, Any], headers: Dict[str, Any], params: Dict[str, Any]) -> requests.Response:
|
||||
_wait_for_slot()
|
||||
response = request("GET", API_URL, source=source, headers=headers, params=params)
|
||||
if response.status_code == 429 and not os.environ.get("NVD_API_KEY"):
|
||||
time.sleep(PUBLIC_INTERVAL_SECONDS)
|
||||
_wait_for_slot()
|
||||
response = request("GET", API_URL, source=source, headers=headers, params=params)
|
||||
return response
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
@@ -23,7 +49,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
|
||||
response = request("GET", API_URL, source=source, headers=headers, params=params)
|
||||
response = request_nvd(source, headers, params)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, An
|
||||
api_key = os.environ.get("NVD_API_KEY")
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
response = request("GET", nvd_api.API_URL, source=source, headers=headers, params=params)
|
||||
response = nvd_api.request_nvd(source, headers, params)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
@@ -160,7 +160,7 @@ def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, An
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
|
||||
return {"kind": kind, "items_seen": len(vendor_index.extract_links(html))}
|
||||
raise ValueError(f"Unsupported source kind {kind}")
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
from html import unescape
|
||||
from html.parser import HTMLParser
|
||||
from typing import Any, Dict, List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
@@ -9,7 +10,42 @@ from intel.http_client import request
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
from .html_links import ANCHOR_RE, TAG_RE, canonicalize_url
|
||||
from .html_links import canonicalize_url
|
||||
|
||||
|
||||
class _AnchorCollector(HTMLParser):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.links: List[tuple[str, str]] = []
|
||||
self._href: str | None = None
|
||||
self._chunks: List[str] = []
|
||||
|
||||
def handle_starttag(self, tag: str, attrs) -> None:
|
||||
if tag.lower() != "a":
|
||||
return
|
||||
href = dict(attrs).get("href")
|
||||
if href:
|
||||
self._href = href
|
||||
self._chunks = []
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self._href is not None:
|
||||
self._chunks.append(data)
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
if tag.lower() != "a" or self._href is None:
|
||||
return
|
||||
text = unescape(" ".join(self._chunks)).strip()
|
||||
self.links.append((self._href, text))
|
||||
self._href = None
|
||||
self._chunks = []
|
||||
|
||||
|
||||
def extract_links(html: str) -> List[tuple[str, str]]:
|
||||
parser = _AnchorCollector()
|
||||
parser.feed(html)
|
||||
parser.close()
|
||||
return parser.links
|
||||
|
||||
|
||||
def _matches(value: str, patterns: List[str]) -> bool:
|
||||
@@ -29,9 +65,9 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
seen = set()
|
||||
for href, text in ANCHOR_RE.findall(html):
|
||||
for href, text in extract_links(html):
|
||||
absolute = canonicalize_url(urljoin(source["url"], href))
|
||||
title = unescape(TAG_RE.sub(" ", text)).strip()
|
||||
title = unescape(text).strip()
|
||||
if not title:
|
||||
continue
|
||||
haystack = " ".join(filter(None, [absolute, title])).lower()
|
||||
|
||||
在新工单中引用
屏蔽一个用户