更新: 13 个文件 - 2026-03-18 09:44:57
这个提交包含在:
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from intel.http_client import request
|
||||
from intel.models import Candidate
|
||||
|
||||
|
||||
ATOM_NS = {"atom": "http://www.w3.org/2005/Atom"}
|
||||
|
||||
|
||||
def _node_text(node: ET.Element, path: str) -> str:
|
||||
child = node.find(path, ATOM_NS)
|
||||
return child.text.strip() if child is not None and child.text else ""
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
parser_hints = source.get("parser_hints") or {}
|
||||
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
|
||||
candidates: List[Candidate] = []
|
||||
entries = root.findall(".//atom:entry", ATOM_NS) or root.findall(".//entry")
|
||||
for entry in entries[: source.get("max_items", 50)]:
|
||||
title = _node_text(entry, "atom:title") or _node_text(entry, "title")
|
||||
link_node = entry.find("atom:link", ATOM_NS) or entry.find("link")
|
||||
link = ""
|
||||
if link_node is not None:
|
||||
link = (link_node.get("href") or "").strip()
|
||||
summary = _node_text(entry, "atom:summary") or _node_text(entry, "summary") or _node_text(entry, "atom:content")
|
||||
if keywords:
|
||||
haystack = " ".join(filter(None, [title, summary, link])).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=link or source["url"],
|
||||
title=title or f"Atom entry for {system['display_name']}",
|
||||
published_at=_node_text(entry, "atom:published") or _node_text(entry, "published"),
|
||||
updated_at=_node_text(entry, "atom:updated") or _node_text(entry, "updated"),
|
||||
summary=summary,
|
||||
severity="unknown",
|
||||
references=[link] if link else [source["url"]],
|
||||
raw={"title": title, "link": link},
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -10,7 +10,7 @@ from intel.utils import unique
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request(
|
||||
"GET",
|
||||
API_URL,
|
||||
source=source,
|
||||
headers=headers,
|
||||
params={"per_page": 100, "page": page, "ecosystem": source.get("ecosystem")},
|
||||
)
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import re
|
||||
from html import unescape
|
||||
from typing import Any, Dict, List
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||
|
||||
import requests
|
||||
|
||||
@@ -16,11 +16,25 @@ ANCHOR_RE = re.compile(r"<a[^>]+href=[\"']([^\"']+)[\"'][^>]*>(.*?)</a>", re.IGN
|
||||
TAG_RE = re.compile(r"<[^>]+>")
|
||||
|
||||
|
||||
def canonicalize_url(url: str) -> str:
|
||||
parsed = urlsplit(url)
|
||||
return urlunsplit((parsed.scheme, parsed.netloc, parsed.path, parsed.query, ""))
|
||||
|
||||
|
||||
def _matches_patterns(value: str, patterns: List[str]) -> bool:
|
||||
if not patterns:
|
||||
return True
|
||||
return any(re.search(pattern, value, re.IGNORECASE) for pattern in patterns)
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
parser_hints = source.get("parser_hints") or {}
|
||||
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
|
||||
include_patterns = parser_hints.get("include_url_patterns") or []
|
||||
exclude_patterns = parser_hints.get("exclude_url_patterns") or []
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
seen = set()
|
||||
@@ -28,10 +42,14 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
title = unescape(TAG_RE.sub(" ", text)).strip()
|
||||
if not title:
|
||||
continue
|
||||
absolute = urljoin(source["url"], href)
|
||||
absolute = canonicalize_url(urljoin(source["url"], href))
|
||||
haystack = f"{title} {absolute}".lower()
|
||||
if keywords and not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
if include_patterns and not _matches_patterns(absolute, include_patterns):
|
||||
continue
|
||||
if exclude_patterns and _matches_patterns(absolute, exclude_patterns):
|
||||
continue
|
||||
if absolute in seen:
|
||||
continue
|
||||
seen.add(absolute)
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from intel.http_client import request
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
|
||||
def _refs(item: Dict[str, Any]) -> List[str]:
|
||||
values: List[str] = []
|
||||
for entry in item.get("references", []) or []:
|
||||
if isinstance(entry, str):
|
||||
values.append(entry)
|
||||
elif isinstance(entry, dict) and entry.get("url"):
|
||||
values.append(entry["url"])
|
||||
return unique(values)
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
items = payload.get("items") or payload.get("entries") or payload.get("advisories") or []
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
|
||||
parser_hints = source.get("parser_hints") or {}
|
||||
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
|
||||
candidates: List[Candidate] = []
|
||||
for item in items[: source.get("max_items", 50)]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = item.get("title") or item.get("name") or item.get("summary") or f"JSON entry for {system['display_name']}"
|
||||
link = item.get("url") or item.get("external_url") or item.get("html_url") or source["url"]
|
||||
summary = item.get("summary") or item.get("content_text") or item.get("description") or ""
|
||||
if keywords:
|
||||
haystack = " ".join(filter(None, [title, summary, link])).lower()
|
||||
if not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
refs = _refs(item)
|
||||
if link and link not in refs:
|
||||
refs.insert(0, link)
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=link,
|
||||
title=title,
|
||||
published_at=item.get("date_published") or item.get("published_at") or item.get("published") or item.get("created_at"),
|
||||
updated_at=item.get("date_modified") or item.get("updated_at") or item.get("modified") or item.get("updated"),
|
||||
summary=summary,
|
||||
severity=str(item.get("severity") or "unknown").lower(),
|
||||
aliases=unique(item.get("aliases", []) or [item.get("id")]),
|
||||
references=refs,
|
||||
raw=item,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
@@ -23,7 +23,7 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
|
||||
response = request("GET", API_URL, headers=headers, params=params)
|
||||
response = request("GET", API_URL, source=source, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
|
||||
@@ -94,10 +94,11 @@ def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
return []
|
||||
|
||||
queries = [{"package": {"name": pkg["name"], "ecosystem": pkg["ecosystem"]}} for pkg in packages]
|
||||
session = build_session()
|
||||
session = build_session(source)
|
||||
response = request(
|
||||
"POST",
|
||||
QUERY_BATCH_URL,
|
||||
source=source,
|
||||
session=session,
|
||||
json={"queries": queries},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
|
||||
@@ -15,11 +15,12 @@ def _text(node: ET.Element, name: str) -> str:
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
keywords = {kw.lower() for kw in source.get("keywords", [])}
|
||||
parser_hints = source.get("parser_hints") or {}
|
||||
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
|
||||
items = root.findall(".//item")
|
||||
candidates: List[Candidate] = []
|
||||
for item in items[: source.get("max_items", 50)]:
|
||||
|
||||
@@ -8,11 +8,12 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from intel.config import iter_all_sources
|
||||
from intel.http_client import request
|
||||
from intel.models import Candidate
|
||||
from intel.utils import parse_dt
|
||||
|
||||
from . import cisa_kev, github_global, html_links, nvd_api, osv_api, rss_feed
|
||||
from . import atom_feed, cisa_kev, github_global, html_links, json_feed, nvd_api, osv_api, rss_feed, vendor_index
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
@@ -21,11 +22,59 @@ HANDLERS = {
|
||||
"kev-json": cisa_kev.fetch,
|
||||
"nvd-search": nvd_api.fetch,
|
||||
"rss-feed": rss_feed.fetch,
|
||||
"atom-feed": atom_feed.fetch,
|
||||
"json-feed": json_feed.fetch,
|
||||
"html-links": html_links.fetch,
|
||||
"vendor-index": vendor_index.fetch,
|
||||
}
|
||||
|
||||
|
||||
def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _failure_category(exc: Exception) -> str:
|
||||
if isinstance(exc, requests.exceptions.SSLError):
|
||||
return "tls"
|
||||
if isinstance(exc, requests.exceptions.HTTPError):
|
||||
response = getattr(exc, "response", None)
|
||||
status = getattr(response, "status_code", None)
|
||||
if status == 429:
|
||||
return "rate_limit"
|
||||
return "http_status"
|
||||
if isinstance(exc, requests.exceptions.RequestException):
|
||||
return "network"
|
||||
if isinstance(exc, ET.ParseError):
|
||||
return "parse"
|
||||
if isinstance(exc, ValueError):
|
||||
return "schema"
|
||||
return "parse"
|
||||
|
||||
|
||||
def failure_summary(failure: Dict[str, Any]) -> str:
|
||||
if isinstance(failure, str):
|
||||
return failure
|
||||
return failure.get("summary") or f"{failure.get('system_id')}::{failure.get('source_name')}::{failure.get('category')}::{failure.get('exception')}"
|
||||
|
||||
|
||||
def _build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception) -> Dict[str, Any]:
|
||||
response = getattr(exc, "response", None)
|
||||
status_code = getattr(response, "status_code", None)
|
||||
category = _failure_category(exc)
|
||||
message = str(exc).strip() or exc.__class__.__name__
|
||||
summary = f"{system['system_id']}::{source['name']}::{category}::{message}"
|
||||
return {
|
||||
"system_id": system["system_id"],
|
||||
"display_name": system["display_name"],
|
||||
"source_name": source["name"],
|
||||
"source_kind": source["kind"],
|
||||
"source_bucket": source.get("bucket_name"),
|
||||
"category": category,
|
||||
"exception": exc.__class__.__name__,
|
||||
"message": message,
|
||||
"status_code": status_code,
|
||||
"url": source.get("url") or "",
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
|
||||
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
|
||||
kind = source["kind"]
|
||||
if kind == "ghsa-global":
|
||||
headers = {"Accept": "application/vnd.github+json", "User-Agent": "websafe-intel"}
|
||||
@@ -35,6 +84,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
|
||||
response = request(
|
||||
"GET",
|
||||
github_global.API_URL,
|
||||
source=source,
|
||||
headers=headers,
|
||||
params={"per_page": 1, "page": 1, "ecosystem": source.get("ecosystem")},
|
||||
)
|
||||
@@ -52,6 +102,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
|
||||
response = request(
|
||||
"POST",
|
||||
osv_api.QUERY_BATCH_URL,
|
||||
source=source,
|
||||
json={"queries": [{"package": {"name": packages[0]["name"], "ecosystem": packages[0]["ecosystem"]}}]},
|
||||
headers={"User-Agent": "websafe-intel"},
|
||||
)
|
||||
@@ -61,7 +112,7 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
|
||||
raise ValueError("OSV probe returned non-object payload")
|
||||
return {"kind": kind, "items_seen": len(payload.get("results", []))}
|
||||
if kind == "kev-json":
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
@@ -76,19 +127,37 @@ def _probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, A
|
||||
api_key = os.environ.get("NVD_API_KEY")
|
||||
if api_key:
|
||||
headers["apiKey"] = api_key
|
||||
response = request("GET", nvd_api.API_URL, headers=headers, params=params)
|
||||
response = request("GET", nvd_api.API_URL, source=source, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("NVD probe returned non-object payload")
|
||||
return {"kind": kind, "items_seen": len(payload.get("vulnerabilities", []))}
|
||||
if kind == "rss-feed":
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
return {"kind": kind, "items_seen": len(root.findall(".//item"))}
|
||||
if kind == "atom-feed":
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
return {"kind": kind, "items_seen": len(root.findall(".//{http://www.w3.org/2005/Atom}entry"))}
|
||||
if kind == "json-feed":
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
items = payload.get("items") or payload.get("entries") or payload.get("advisories") or []
|
||||
if not isinstance(items, list):
|
||||
raise ValueError("JSON feed probe returned non-list items")
|
||||
return {"kind": kind, "items_seen": len(items)}
|
||||
if kind == "html-links":
|
||||
response = request("GET", source["url"])
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
|
||||
if kind == "vendor-index":
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
return {"kind": kind, "items_seen": len(html_links.ANCHOR_RE.findall(html))}
|
||||
@@ -110,47 +179,59 @@ def collect_candidates(
|
||||
since_dt: Optional[datetime] = None,
|
||||
tier: Optional[str] = None,
|
||||
include_undated: bool = False,
|
||||
) -> Tuple[List[Candidate], List[str]]:
|
||||
) -> Tuple[List[Candidate], List[Dict[str, Any]]]:
|
||||
all_candidates: List[Candidate] = []
|
||||
failures: List[str] = []
|
||||
failures: List[Dict[str, Any]] = []
|
||||
|
||||
for system in source_map["systems"]:
|
||||
if tier and system.get("tier") != tier:
|
||||
continue
|
||||
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
|
||||
for source in system.get(bucket_name, []):
|
||||
handler = HANDLERS.get(source["kind"])
|
||||
if handler is None:
|
||||
failures.append(f"Unsupported source kind {source['kind']} for {system['system_id']}")
|
||||
continue
|
||||
try:
|
||||
items = handler(system, source)
|
||||
for item in items:
|
||||
if _passes_since(item, since_dt, include_undated):
|
||||
all_candidates.append(item)
|
||||
except Exception as exc:
|
||||
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
|
||||
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
|
||||
handler = HANDLERS.get(source["kind"])
|
||||
if handler is None:
|
||||
failures.append(
|
||||
{
|
||||
"system_id": system["system_id"],
|
||||
"display_name": system["display_name"],
|
||||
"source_name": source["name"],
|
||||
"source_kind": source["kind"],
|
||||
"source_bucket": source.get("bucket_name"),
|
||||
"category": "schema",
|
||||
"exception": "UnsupportedSourceKind",
|
||||
"message": f"Unsupported source kind {source['kind']}",
|
||||
"status_code": None,
|
||||
"url": source.get("url") or "",
|
||||
"summary": f"{system['system_id']}::{source['name']}::schema::Unsupported source kind {source['kind']}",
|
||||
}
|
||||
)
|
||||
continue
|
||||
try:
|
||||
items = handler(system, source)
|
||||
for item in items:
|
||||
if _passes_since(item, since_dt, include_undated):
|
||||
all_candidates.append(item)
|
||||
except Exception as exc:
|
||||
failures.append(_build_failure(system, source, exc))
|
||||
return all_candidates, failures
|
||||
|
||||
|
||||
def probe_sources(
|
||||
source_map: Dict[str, Any],
|
||||
tier: Optional[str] = None,
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
jobs: List[Tuple[Dict[str, Any], Dict[str, Any]]] = []
|
||||
probes: List[Dict[str, Any]] = []
|
||||
failures: List[str] = []
|
||||
failures: List[Dict[str, Any]] = []
|
||||
|
||||
for system in source_map["systems"]:
|
||||
if tier and system.get("tier") != tier:
|
||||
continue
|
||||
for bucket_name in ("official_sources", "ecosystem_sources", "research_sources"):
|
||||
for source in system.get(bucket_name, []):
|
||||
jobs.append((system, source))
|
||||
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=False):
|
||||
jobs.append((system, source))
|
||||
|
||||
max_workers = min(16, max(4, len(jobs) or 1))
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_map = {executor.submit(_probe_source, system, source): (system, source) for system, source in jobs}
|
||||
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
|
||||
for future in as_completed(future_map):
|
||||
system, source = future_map[future]
|
||||
try:
|
||||
@@ -164,5 +245,15 @@ def probe_sources(
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
failures.append(f"{system['system_id']}::{source['name']}::{exc.__class__.__name__}")
|
||||
failures.append(_build_failure(system, source, exc))
|
||||
return probes, failures
|
||||
|
||||
|
||||
def find_source(source_map: Dict[str, Any], system_id: str, source_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]] | None:
|
||||
for system in source_map.get("systems", []) or []:
|
||||
if system.get("system_id") != system_id:
|
||||
continue
|
||||
for _system, _bucket_name, source in iter_all_sources({"systems": [system]}, include_retired=True):
|
||||
if source.get("name") == source_name:
|
||||
return system, source
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from html import unescape
|
||||
from typing import Any, Dict, List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from intel.http_client import request
|
||||
from intel.models import Candidate
|
||||
from intel.utils import unique
|
||||
|
||||
from .html_links import ANCHOR_RE, TAG_RE, canonicalize_url
|
||||
|
||||
|
||||
def _matches(value: str, patterns: List[str]) -> bool:
|
||||
if not patterns:
|
||||
return True
|
||||
return any(re.search(pattern, value, re.IGNORECASE) for pattern in patterns)
|
||||
|
||||
|
||||
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
|
||||
response = request("GET", source["url"], source=source)
|
||||
response.raise_for_status()
|
||||
html = response.text
|
||||
parser_hints = source.get("parser_hints") or {}
|
||||
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
|
||||
include_patterns = parser_hints.get("include_url_patterns") or []
|
||||
exclude_patterns = parser_hints.get("exclude_url_patterns") or []
|
||||
|
||||
candidates: List[Candidate] = []
|
||||
seen = set()
|
||||
for href, text in ANCHOR_RE.findall(html):
|
||||
absolute = canonicalize_url(urljoin(source["url"], href))
|
||||
title = unescape(TAG_RE.sub(" ", text)).strip()
|
||||
if not title:
|
||||
continue
|
||||
haystack = " ".join(filter(None, [absolute, title])).lower()
|
||||
if keywords and not any(keyword in haystack for keyword in keywords):
|
||||
continue
|
||||
if include_patterns and not _matches(absolute, include_patterns):
|
||||
continue
|
||||
if exclude_patterns and _matches(absolute, exclude_patterns):
|
||||
continue
|
||||
if absolute in seen:
|
||||
continue
|
||||
seen.add(absolute)
|
||||
candidates.append(
|
||||
Candidate(
|
||||
system_id=system["system_id"],
|
||||
display_name=system["display_name"],
|
||||
category=system["category"],
|
||||
advisory_mode=source.get("advisory_mode", "core"),
|
||||
source_kind=source["kind"],
|
||||
source_name=source["name"],
|
||||
source_confidence=source["confidence"],
|
||||
source_url=absolute,
|
||||
title=title,
|
||||
summary="",
|
||||
severity="unknown",
|
||||
references=unique([absolute]),
|
||||
raw={"href": absolute, "title": title},
|
||||
)
|
||||
)
|
||||
if len(candidates) >= source.get("max_items", 50):
|
||||
break
|
||||
return candidates
|
||||
在新工单中引用
屏蔽一个用户