更新: 3 个文件 - 2026-03-21 17:44:22
这个提交包含在:
@@ -13,6 +13,7 @@ REGISTRY_ROOT = THREAT_INTEL_ROOT / "registry"
|
||||
ADVISORIES_DIR = REGISTRY_ROOT / "advisories"
|
||||
SYSTEMS_DIR = REGISTRY_ROOT / "systems"
|
||||
ENTITIES_DIR = REGISTRY_ROOT / "entities"
|
||||
VERSIONS_DIR = REGISTRY_ROOT / "versions"
|
||||
RUNS_DIR = REGISTRY_ROOT / "runs"
|
||||
TRIAGE_DIR = REGISTRY_ROOT / "triage"
|
||||
GENERATED_DIR = THREAT_INTEL_ROOT / "generated"
|
||||
@@ -32,6 +33,11 @@ ENTITY_BACKLOG_PATH = GENERATED_DIR / "entity-discovery-backlog.json"
|
||||
ENTITY_QUEUES_PATH = GENERATED_DIR / "entity-queues.json"
|
||||
ENTITY_CATALOG_REPORT_MD_PATH = GENERATED_DIR / "entity-catalog-report.md"
|
||||
ENTITY_BACKLOG_REPORT_MD_PATH = GENERATED_DIR / "entity-discovery-backlog.md"
|
||||
VERSION_COMPLETENESS_PATH = GENERATED_DIR / "version-completeness.json"
|
||||
VERSION_BACKLOG_PATH = GENERATED_DIR / "version-backlog.json"
|
||||
RELEASE_INDEX_PATH = GENERATED_DIR / "release-index.json"
|
||||
VERSION_REPORT_MD_PATH = GENERATED_DIR / "version-sync-report.md"
|
||||
LAB_ENQUEUE_SUMMARY_PATH = GENERATED_DIR / "lab-enqueue-summary.json"
|
||||
STATE_DIR = Path.home() / ".local" / "state" / "websafe-intel"
|
||||
STATE_PATH = STATE_DIR / "state.json"
|
||||
|
||||
@@ -62,6 +68,12 @@ DEFAULT_PARSER_HINTS = {
|
||||
"date_extractors": [],
|
||||
}
|
||||
|
||||
DEFAULT_RELEASE_SELECTOR = {
|
||||
"version_patterns": [],
|
||||
"date_patterns": [],
|
||||
"release_url_patterns": [],
|
||||
}
|
||||
|
||||
DEFAULT_ACCEPT_BY_KIND = {
|
||||
"rss-feed": "application/rss+xml, application/xml;q=0.9, text/xml;q=0.9, */*;q=0.8",
|
||||
"atom-feed": "application/atom+xml, application/xml;q=0.9, text/xml;q=0.9, */*;q=0.8",
|
||||
@@ -85,6 +97,41 @@ DEFAULT_FORMAT_BY_KIND = {
|
||||
}
|
||||
|
||||
|
||||
def _infer_source_purpose(bucket_name: str, source: Dict[str, Any]) -> str:
|
||||
configured = (source.get("purpose") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
url = (source.get("url") or "").lower()
|
||||
kind = source.get("kind") or ""
|
||||
if any(token in url for token in ("/plugins/", "/themes/", "/marketplace/", "/extensions/", "/modules/")):
|
||||
return "marketplace"
|
||||
if any(token in url for token in ("/releases", "/tags", "/release", "release-notes", "security-releases", "/feed/", ".rss", ".xml")):
|
||||
return "release"
|
||||
if kind in {"rss-feed", "atom-feed", "json-feed", "vendor-index", "html-links"} and bucket_name != "research_sources":
|
||||
return "discovery"
|
||||
return "advisory"
|
||||
|
||||
|
||||
def _infer_entity_type_hint(source: Dict[str, Any]) -> str:
|
||||
configured = (source.get("entity_type_hint") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
url = (source.get("url") or "").lower()
|
||||
if "/plugins/" in url:
|
||||
return "plugin"
|
||||
if "/themes/" in url:
|
||||
return "theme"
|
||||
if "/extensions/" in url:
|
||||
return "extension"
|
||||
if "/modules/" in url:
|
||||
return "module"
|
||||
if "github.com/" in url:
|
||||
return "repo"
|
||||
if "npmjs.com/package/" in url or "packagist.org/packages/" in url:
|
||||
return "package"
|
||||
return "project"
|
||||
|
||||
|
||||
def _normalize_source(source: Dict[str, Any], bucket_name: str) -> Dict[str, Any]:
|
||||
normalized = dict(source or {})
|
||||
normalized["status"] = normalized.get("status") or "active"
|
||||
@@ -112,6 +159,11 @@ def _normalize_source(source: Dict[str, Any], bucket_name: str) -> Dict[str, Any
|
||||
if not parser_hints.get("keywords"):
|
||||
parser_hints["keywords"] = list(normalized.get("keywords") or [])
|
||||
normalized["parser_hints"] = parser_hints
|
||||
normalized["purpose"] = _infer_source_purpose(bucket_name, normalized)
|
||||
normalized["entity_type_hint"] = _infer_entity_type_hint(normalized)
|
||||
normalized["auto_catalog"] = bool(normalized.get("auto_catalog", bucket_name in {"official_sources", "ecosystem_sources"}))
|
||||
normalized["version_mode"] = normalized.get("version_mode") or "security-related"
|
||||
normalized["release_selector"] = {**DEFAULT_RELEASE_SELECTOR, **(normalized.get("release_selector") or {})}
|
||||
normalized["bucket_name"] = bucket_name
|
||||
return normalized
|
||||
|
||||
|
||||
在新工单中引用
屏蔽一个用户