更新: 421 个文件 - 2026-03-17 18:30:02
这个提交包含在:
100
scripts/lab/evaluate.py
普通文件
100
scripts/lab/evaluate.py
普通文件
@@ -0,0 +1,100 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def _assertion(name: str, kind: str, passed: bool, detail: str) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": name,
|
||||
"kind": kind,
|
||||
"passed": passed,
|
||||
"detail": detail,
|
||||
}
|
||||
|
||||
|
||||
def _baseline_ok(payload: Dict[str, Any]) -> bool:
|
||||
observations = payload.get("observations", []) or []
|
||||
if not observations:
|
||||
return False
|
||||
for item in observations:
|
||||
if item.get("error"):
|
||||
return False
|
||||
status_code = item.get("status_code")
|
||||
if status_code is None or int(status_code) >= 500:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _attack_steps_ok(payload: Dict[str, Any]) -> bool:
|
||||
steps = payload.get("steps", []) or []
|
||||
if payload.get("success") is True:
|
||||
return True
|
||||
if not steps:
|
||||
return False
|
||||
return not any(step.get("status") == "failed" for step in steps)
|
||||
|
||||
|
||||
def evaluate_run(
|
||||
profile: Dict[str, Any],
|
||||
provision_result: Dict[str, Any],
|
||||
baseline_payload: Dict[str, Any],
|
||||
attack_payload: Dict[str, Any],
|
||||
browser_payload: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
assertions: List[Dict[str, Any]] = []
|
||||
configured = profile.get("success_assertions", []) or []
|
||||
browser_required = bool(profile.get("browser_assertions", {}).get("required"))
|
||||
if not configured:
|
||||
configured = [
|
||||
{"name": "baseline-ok", "type": "baseline-ok"},
|
||||
{"name": "attack-steps", "type": "attack-steps-ok"},
|
||||
]
|
||||
if browser_required:
|
||||
configured.append({"name": "browser-present", "type": "browser-present"})
|
||||
|
||||
for item in configured:
|
||||
assertion_type = item.get("type", "")
|
||||
name = item.get("name") or assertion_type or "assertion"
|
||||
if assertion_type == "runner-success":
|
||||
passed = bool(attack_payload.get("success"))
|
||||
detail = attack_payload.get("detail") or ("runner reported success" if passed else "runner did not confirm success")
|
||||
elif assertion_type == "baseline-ok":
|
||||
passed = _baseline_ok(baseline_payload)
|
||||
detail = "baseline URLs responded without 5xx or transport errors" if passed else "baseline checks were incomplete"
|
||||
elif assertion_type == "attack-steps-ok":
|
||||
passed = _attack_steps_ok(attack_payload)
|
||||
detail = "attack steps completed without failures" if passed else "attack steps failed or produced no usable result"
|
||||
elif assertion_type == "browser-present":
|
||||
passed = bool(browser_payload.get("present"))
|
||||
detail = "browser evidence captured" if passed else (browser_payload.get("reason") or "browser evidence missing")
|
||||
else:
|
||||
passed = False
|
||||
detail = f"unsupported assertion type: {assertion_type}"
|
||||
assertions.append(_assertion(name, assertion_type, passed, detail))
|
||||
|
||||
blocked_reason = provision_result.get("blocked_reason")
|
||||
if browser_required and not browser_payload.get("present"):
|
||||
blocked_reason = blocked_reason or browser_payload.get("reason") or "browser evidence incomplete"
|
||||
|
||||
passed = all(item["passed"] for item in assertions)
|
||||
artifact_mode = profile.get("artifact_mode", profile.get("provisioning_mode", "synthetic"))
|
||||
verification_status = "triage-manual"
|
||||
if provision_result.get("status") == "blocked-artifact":
|
||||
verification_status = "blocked-artifact"
|
||||
elif not passed:
|
||||
verification_status = "triage-manual"
|
||||
failed = next((item for item in assertions if not item["passed"]), None)
|
||||
if failed and not blocked_reason:
|
||||
blocked_reason = failed["detail"]
|
||||
elif artifact_mode == "synthetic":
|
||||
verification_status = "verified-synthetic"
|
||||
else:
|
||||
verification_status = "verified-real"
|
||||
|
||||
return {
|
||||
"passed": passed and verification_status.startswith("verified-"),
|
||||
"verification_status": verification_status,
|
||||
"blocked_reason": blocked_reason,
|
||||
"assertions": assertions,
|
||||
}
|
||||
|
||||
在新工单中引用
屏蔽一个用户