更新: 3 个文件 - 2026-03-21 17:44:22

这个提交包含在:
hao
2026-03-21 17:44:22 -07:00
父节点 e13c138232
当前提交 a0a5067ae1
修改 3 个文件,包含 238 行新增29 行删除

查看文件

@@ -13,6 +13,7 @@ REGISTRY_ROOT = THREAT_INTEL_ROOT / "registry"
ADVISORIES_DIR = REGISTRY_ROOT / "advisories"
SYSTEMS_DIR = REGISTRY_ROOT / "systems"
ENTITIES_DIR = REGISTRY_ROOT / "entities"
VERSIONS_DIR = REGISTRY_ROOT / "versions"
RUNS_DIR = REGISTRY_ROOT / "runs"
TRIAGE_DIR = REGISTRY_ROOT / "triage"
GENERATED_DIR = THREAT_INTEL_ROOT / "generated"
@@ -32,6 +33,11 @@ ENTITY_BACKLOG_PATH = GENERATED_DIR / "entity-discovery-backlog.json"
ENTITY_QUEUES_PATH = GENERATED_DIR / "entity-queues.json"
ENTITY_CATALOG_REPORT_MD_PATH = GENERATED_DIR / "entity-catalog-report.md"
ENTITY_BACKLOG_REPORT_MD_PATH = GENERATED_DIR / "entity-discovery-backlog.md"
VERSION_COMPLETENESS_PATH = GENERATED_DIR / "version-completeness.json"
VERSION_BACKLOG_PATH = GENERATED_DIR / "version-backlog.json"
RELEASE_INDEX_PATH = GENERATED_DIR / "release-index.json"
VERSION_REPORT_MD_PATH = GENERATED_DIR / "version-sync-report.md"
LAB_ENQUEUE_SUMMARY_PATH = GENERATED_DIR / "lab-enqueue-summary.json"
STATE_DIR = Path.home() / ".local" / "state" / "websafe-intel"
STATE_PATH = STATE_DIR / "state.json"
@@ -62,6 +68,12 @@ DEFAULT_PARSER_HINTS = {
"date_extractors": [],
}
DEFAULT_RELEASE_SELECTOR = {
"version_patterns": [],
"date_patterns": [],
"release_url_patterns": [],
}
DEFAULT_ACCEPT_BY_KIND = {
"rss-feed": "application/rss+xml, application/xml;q=0.9, text/xml;q=0.9, */*;q=0.8",
"atom-feed": "application/atom+xml, application/xml;q=0.9, text/xml;q=0.9, */*;q=0.8",
@@ -85,6 +97,41 @@ DEFAULT_FORMAT_BY_KIND = {
}
def _infer_source_purpose(bucket_name: str, source: Dict[str, Any]) -> str:
configured = (source.get("purpose") or "").strip()
if configured:
return configured
url = (source.get("url") or "").lower()
kind = source.get("kind") or ""
if any(token in url for token in ("/plugins/", "/themes/", "/marketplace/", "/extensions/", "/modules/")):
return "marketplace"
if any(token in url for token in ("/releases", "/tags", "/release", "release-notes", "security-releases", "/feed/", ".rss", ".xml")):
return "release"
if kind in {"rss-feed", "atom-feed", "json-feed", "vendor-index", "html-links"} and bucket_name != "research_sources":
return "discovery"
return "advisory"
def _infer_entity_type_hint(source: Dict[str, Any]) -> str:
configured = (source.get("entity_type_hint") or "").strip()
if configured:
return configured
url = (source.get("url") or "").lower()
if "/plugins/" in url:
return "plugin"
if "/themes/" in url:
return "theme"
if "/extensions/" in url:
return "extension"
if "/modules/" in url:
return "module"
if "github.com/" in url:
return "repo"
if "npmjs.com/package/" in url or "packagist.org/packages/" in url:
return "package"
return "project"
def _normalize_source(source: Dict[str, Any], bucket_name: str) -> Dict[str, Any]:
normalized = dict(source or {})
normalized["status"] = normalized.get("status") or "active"
@@ -112,6 +159,11 @@ def _normalize_source(source: Dict[str, Any], bucket_name: str) -> Dict[str, Any
if not parser_hints.get("keywords"):
parser_hints["keywords"] = list(normalized.get("keywords") or [])
normalized["parser_hints"] = parser_hints
normalized["purpose"] = _infer_source_purpose(bucket_name, normalized)
normalized["entity_type_hint"] = _infer_entity_type_hint(normalized)
normalized["auto_catalog"] = bool(normalized.get("auto_catalog", bucket_name in {"official_sources", "ecosystem_sources"}))
normalized["version_mode"] = normalized.get("version_mode") or "security-related"
normalized["release_selector"] = {**DEFAULT_RELEASE_SELECTOR, **(normalized.get("release_selector") or {})}
normalized["bucket_name"] = bucket_name
return normalized