kb: expand authorized lab coverage and intel automation

这个提交包含在:
hao
2026-03-16 22:04:51 -07:00
父节点 cda31e86c7
当前提交 d0120fbf10
修改 592 个文件,包含 29025 行新增267 行删除

查看文件

@@ -0,0 +1,51 @@
from __future__ import annotations
import xml.etree.ElementTree as ET
from typing import Any, Dict, List
import requests
from intel.models import Candidate
def _text(node: ET.Element, name: str) -> str:
child = node.find(name)
return child.text.strip() if child is not None and child.text else ""
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
response = requests.get(source["url"], headers={"User-Agent": "websafe-intel"}, timeout=30)
response.raise_for_status()
root = ET.fromstring(response.content)
keywords = {kw.lower() for kw in source.get("keywords", [])}
items = root.findall(".//item")
candidates: List[Candidate] = []
for item in items[: source.get("max_items", 50)]:
title = _text(item, "title")
link = _text(item, "link") or source["url"]
description = _text(item, "description")
if keywords:
haystack = " ".join([title, description]).lower()
if not any(keyword in haystack for keyword in keywords):
continue
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=link,
title=title or f"RSS entry for {system['display_name']}",
published_at=_text(item, "pubDate"),
updated_at=_text(item, "pubDate"),
summary=description,
severity="unknown",
references=[link],
raw={"title": title, "link": link},
)
)
return candidates