更新: 4 个文件 - 2026-03-18 17:23:40
这个提交包含在:
@@ -206,6 +206,37 @@ def build_source_health_snapshot(
|
||||
all_green = not normalized_failures
|
||||
previous = previous or {}
|
||||
last_fully_green_run = generated_at if all_green else previous.get("last_fully_green_run")
|
||||
slow_sources = []
|
||||
telemetry_rows = []
|
||||
for probe in probes:
|
||||
if probe.get("elapsed_seconds") is None:
|
||||
continue
|
||||
telemetry_rows.append(
|
||||
{
|
||||
"system_id": probe["system_id"],
|
||||
"source_name": probe["source_name"],
|
||||
"source_kind": probe.get("source_kind"),
|
||||
"elapsed_seconds": probe.get("elapsed_seconds"),
|
||||
"status": "ok",
|
||||
}
|
||||
)
|
||||
for failure in normalized_failures:
|
||||
if failure.get("elapsed_seconds") is None:
|
||||
continue
|
||||
telemetry_rows.append(
|
||||
{
|
||||
"system_id": failure["system_id"],
|
||||
"source_name": failure["source_name"],
|
||||
"source_kind": failure.get("source_kind"),
|
||||
"elapsed_seconds": failure.get("elapsed_seconds"),
|
||||
"status": failure.get("category") or "failure",
|
||||
}
|
||||
)
|
||||
slow_sources = sorted(
|
||||
telemetry_rows,
|
||||
key=lambda item: float(item.get("elapsed_seconds") or 0),
|
||||
reverse=True,
|
||||
)[:10]
|
||||
return {
|
||||
"generated_at": generated_at,
|
||||
"active_source_count": active_source_total,
|
||||
@@ -216,6 +247,7 @@ def build_source_health_snapshot(
|
||||
"retries_performed": retries_performed,
|
||||
"probes": sorted(probes, key=lambda item: (item["system_id"], item["source_name"])),
|
||||
"failures": normalized_failures,
|
||||
"slow_sources": slow_sources,
|
||||
"systems": sorted(systems.values(), key=lambda item: item["system_id"]),
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
import xml.etree.ElementTree as ET
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import datetime
|
||||
from time import perf_counter
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
@@ -56,13 +57,19 @@ def failure_summary(failure: Dict[str, Any]) -> str:
|
||||
return failure.get("summary") or f"{failure.get('system_id')}::{failure.get('source_name')}::{failure.get('category')}::{failure.get('exception')}"
|
||||
|
||||
|
||||
def build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception) -> Dict[str, Any]:
|
||||
def build_failure(
|
||||
system: Dict[str, Any],
|
||||
source: Dict[str, Any],
|
||||
exc: Exception,
|
||||
*,
|
||||
elapsed_seconds: float | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
response = getattr(exc, "response", None)
|
||||
status_code = getattr(response, "status_code", None)
|
||||
category = _failure_category(exc)
|
||||
message = str(exc).strip() or exc.__class__.__name__
|
||||
summary = f"{system['system_id']}::{source['name']}::{category}::{message}"
|
||||
return {
|
||||
failure = {
|
||||
"system_id": system["system_id"],
|
||||
"display_name": system["display_name"],
|
||||
"source_name": source["name"],
|
||||
@@ -75,6 +82,9 @@ def build_failure(system: Dict[str, Any], source: Dict[str, Any], exc: Exception
|
||||
"url": source.get("url") or "",
|
||||
"summary": summary,
|
||||
}
|
||||
if elapsed_seconds is not None:
|
||||
failure["elapsed_seconds"] = round(elapsed_seconds, 3)
|
||||
return failure
|
||||
|
||||
|
||||
def _collect_jobs(
|
||||
@@ -113,6 +123,7 @@ def _collect_source_candidates(
|
||||
since_dt: Optional[datetime],
|
||||
include_undated: bool,
|
||||
) -> Tuple[List[Candidate], Optional[Dict[str, Any]]]:
|
||||
started = perf_counter()
|
||||
handler = HANDLERS.get(source["kind"])
|
||||
if handler is None:
|
||||
return (
|
||||
@@ -136,7 +147,7 @@ def _collect_source_candidates(
|
||||
filtered = [item for item in items if _passes_since(item, since_dt, include_undated)]
|
||||
return filtered, None
|
||||
except Exception as exc:
|
||||
return [], build_failure(system, source, exc)
|
||||
return [], build_failure(system, source, exc, elapsed_seconds=perf_counter() - started)
|
||||
|
||||
|
||||
def probe_source(system: Dict[str, Any], source: Dict[str, Any]) -> Dict[str, Any]:
|
||||
@@ -279,9 +290,10 @@ def probe_sources(
|
||||
probes: List[Dict[str, Any]] = []
|
||||
failures: List[Dict[str, Any]] = []
|
||||
with ThreadPoolExecutor(max_workers=_max_workers(len(jobs))) as executor:
|
||||
future_map = {executor.submit(probe_source, system, source): (system, source) for system, source in jobs}
|
||||
future_map = {executor.submit(probe_source, system, source): (system, source, perf_counter()) for system, source in jobs}
|
||||
for future in as_completed(future_map):
|
||||
system, source = future_map[future]
|
||||
system, source, started = future_map[future]
|
||||
elapsed = perf_counter() - started
|
||||
try:
|
||||
result = future.result()
|
||||
probes.append(
|
||||
@@ -289,11 +301,12 @@ def probe_sources(
|
||||
"system_id": system["system_id"],
|
||||
"source_name": source["name"],
|
||||
"source_kind": source["kind"],
|
||||
"elapsed_seconds": round(elapsed, 3),
|
||||
**result,
|
||||
}
|
||||
)
|
||||
except Exception as exc:
|
||||
failures.append(build_failure(system, source, exc))
|
||||
failures.append(build_failure(system, source, exc, elapsed_seconds=elapsed))
|
||||
probes.sort(key=lambda item: (item["system_id"], item["source_name"]))
|
||||
failures.sort(key=lambda item: (item.get("system_id", ""), item.get("source_name", ""), item.get("category", "")))
|
||||
return probes, failures
|
||||
|
||||
在新工单中引用
屏蔽一个用户