文件
websafe-kb/scripts/intel/sources/atom_feed.py

58 行
2.4 KiB
Python

from __future__ import annotations
import xml.etree.ElementTree as ET
from typing import Any, Dict, List
from intel.http_client import request
from intel.models import Candidate
ATOM_NS = {"atom": "http://www.w3.org/2005/Atom"}
def _node_text(node: ET.Element, path: str) -> str:
child = node.find(path, ATOM_NS)
return child.text.strip() if child is not None and child.text else ""
def fetch(system: Dict[str, Any], source: Dict[str, Any]) -> List[Candidate]:
response = request("GET", source["url"], source=source)
response.raise_for_status()
root = ET.fromstring(response.content)
parser_hints = source.get("parser_hints") or {}
keywords = {kw.lower() for kw in (parser_hints.get("keywords") or source.get("keywords", []))}
candidates: List[Candidate] = []
entries = root.findall(".//atom:entry", ATOM_NS) or root.findall(".//entry")
for entry in entries[: source.get("max_items", 50)]:
title = _node_text(entry, "atom:title") or _node_text(entry, "title")
link_node = entry.find("atom:link", ATOM_NS) or entry.find("link")
link = ""
if link_node is not None:
link = (link_node.get("href") or "").strip()
summary = _node_text(entry, "atom:summary") or _node_text(entry, "summary") or _node_text(entry, "atom:content")
if keywords:
haystack = " ".join(filter(None, [title, summary, link])).lower()
if not any(keyword in haystack for keyword in keywords):
continue
candidates.append(
Candidate(
system_id=system["system_id"],
display_name=system["display_name"],
category=system["category"],
advisory_mode=source.get("advisory_mode", "core"),
source_kind=source["kind"],
source_name=source["name"],
source_confidence=source["confidence"],
source_url=link or source["url"],
title=title or f"Atom entry for {system['display_name']}",
published_at=_node_text(entry, "atom:published") or _node_text(entry, "published"),
updated_at=_node_text(entry, "atom:updated") or _node_text(entry, "updated"),
summary=summary,
severity="unknown",
references=[link] if link else [source["url"]],
raw={"title": title, "link": link},
)
)
return candidates