文件
websafe-kb/scripts/lab/browser.py

70 行
2.5 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, List
from lab.utils import ensure_dir, write_json
def capture(url: str, run_dir: Path, prefix: str = "baseline") -> Dict[str, Any]:
payload: Dict[str, Any] = {
"required": True,
"present": False,
"refs": [],
"reason": "playwright runtime unavailable",
}
try:
from playwright.sync_api import sync_playwright # type: ignore
except Exception:
write_json(run_dir / "logs" / f"{prefix}-browser.json", payload)
return payload
assets_dir = run_dir / "assets"
ensure_dir(assets_dir)
screenshot_path = assets_dir / f"{prefix}.png"
dom_path = assets_dir / f"{prefix}-dom.html"
console_path = run_dir / "logs" / f"{prefix}-console.json"
network_path = run_dir / "logs" / f"{prefix}-network.json"
page_path = run_dir / "logs" / f"{prefix}-page.json"
console_messages: List[Dict[str, Any]] = []
requests_seen: List[Dict[str, Any]] = []
page_title = ""
page_body_excerpt = ""
final_url = url
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.on("console", lambda msg: console_messages.append({"type": msg.type, "text": msg.text}))
page.on("request", lambda req: requests_seen.append({"method": req.method, "url": req.url}))
page.goto(url, wait_until="networkidle", timeout=20000)
page.screenshot(path=str(screenshot_path), full_page=True)
dom_path.write_text(page.content(), encoding="utf-8")
final_url = page.url
page_title = page.title()
page_body_excerpt = (page.text_content("body") or "")[:600]
browser.close()
except Exception as exc:
payload["reason"] = str(exc)
write_json(run_dir / "logs" / f"{prefix}-browser.json", payload)
return payload
write_json(console_path, console_messages)
write_json(network_path, requests_seen)
write_json(
page_path,
{
"url": final_url,
"title": page_title,
"body_excerpt": page_body_excerpt,
},
)
payload = {
"required": True,
"present": True,
"page_title": page_title,
"page_url": final_url,
"refs": [str(screenshot_path), str(dom_path), str(console_path), str(network_path), str(page_path)],
}
write_json(run_dir / "logs" / f"{prefix}-browser.json", payload)
return payload