文件
skills/similarweb-analytics/scripts/docker/entrypoint.py
2026-03-05 10:28:14 +08:00

250 行
7.8 KiB
Python
可执行文件

#!/usr/bin/env python3
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass
from datetime import date
from typing import Dict, Optional, Tuple
RUNTIME_PATH = "/opt/.manus/.sandbox-runtime"
API_MAP = {
"global-rank": "SimilarWeb/get_global_rank",
"visits-total": "SimilarWeb/get_visits_total",
"unique-visit": "SimilarWeb/get_unique_visit",
"bounce-rate": "SimilarWeb/get_bounce_rate",
"traffic-sources-desktop": "SimilarWeb/get_traffic_sources_desktop",
"traffic-sources-mobile": "SimilarWeb/get_traffic_sources_mobile",
"traffic-by-country": "SimilarWeb/get_total_traffic_by_country",
}
DEFAULT_MONTHS = {
"global-rank": 6,
"visits-total": 6,
"unique-visit": 6,
"bounce-rate": 6,
"traffic-sources-desktop": 3,
"traffic-sources-mobile": 3,
"traffic-by-country": 3,
}
COUNTRY_REQUIRED_APIS = {
"visits-total",
"bounce-rate",
"traffic-sources-desktop",
"traffic-sources-mobile",
}
DATE_RE = re.compile(r"^\d{4}-\d{2}$")
@dataclass(frozen=True)
class YearMonth:
year: int
month: int
def to_string(self) -> str:
return f"{self.year:04d}-{self.month:02d}"
def __lt__(self, other: "YearMonth") -> bool:
return (self.year, self.month) < (other.year, other.month)
def __le__(self, other: "YearMonth") -> bool:
return (self.year, self.month) <= (other.year, other.month)
def parse_ym(value: str, field: str) -> YearMonth:
if not DATE_RE.match(value):
raise ValueError(f"{field} must be YYYY-MM, got {value!r}")
year = int(value[0:4])
month = int(value[5:7])
if month < 1 or month > 12:
raise ValueError(f"{field} month must be in 01..12, got {value!r}")
return YearMonth(year, month)
def shift_months(ym: YearMonth, delta: int) -> YearMonth:
zero_based = ym.year * 12 + (ym.month - 1) + delta
if zero_based < 0:
raise ValueError("date range underflow")
return YearMonth(zero_based // 12, (zero_based % 12) + 1)
def month_span(start: YearMonth, end: YearMonth) -> int:
return (end.year - start.year) * 12 + (end.month - start.month) + 1
def last_complete_month(today: date) -> YearMonth:
current = YearMonth(today.year, today.month)
return shift_months(current, -1)
def default_date_range(api: str, start: Optional[str], end: Optional[str]) -> Tuple[YearMonth, YearMonth]:
window = DEFAULT_MONTHS[api]
lcm = last_complete_month(date.today())
end_ym = parse_ym(end, "end_date") if end else lcm
start_ym = parse_ym(start, "start_date") if start else shift_months(end_ym, -(window - 1))
return start_ym, end_ym
def validate_range(api: str, start_ym: YearMonth, end_ym: YearMonth) -> None:
if end_ym < start_ym:
raise ValueError("end_date must be >= start_date")
lcm = last_complete_month(date.today())
oldest_allowed = shift_months(lcm, -11)
if end_ym > lcm:
raise ValueError(f"end_date must be <= last complete month {lcm.to_string()}")
if start_ym < oldest_allowed:
raise ValueError(f"start_date must be >= {oldest_allowed.to_string()} (12-month lookback)")
span = month_span(start_ym, end_ym)
if span > 12:
raise ValueError("date range cannot exceed 12 months")
if api == "traffic-by-country" and span > 3:
raise ValueError("traffic-by-country supports at most 3 months")
def sanitize_filename(value: str) -> str:
safe = re.sub(r"[^a-zA-Z0-9_.-]+", "-", value.strip())
return safe.strip("-") or "result"
def resolve_output_path(api: str, domain: str, output: Optional[str]) -> str:
if output:
return output
file_name = f"{sanitize_filename(api)}-{sanitize_filename(domain)}.json"
return os.path.join("/data", file_name)
def build_query(args: argparse.Namespace, start_ym: YearMonth, end_ym: YearMonth) -> Dict[str, object]:
query: Dict[str, object] = {
"start_date": start_ym.to_string(),
"end_date": end_ym.to_string(),
}
if args.main_domain_only:
query["main_domain_only"] = True
if args.api in COUNTRY_REQUIRED_APIS:
query["country"] = args.country
query["granularity"] = args.granularity
elif args.api == "traffic-by-country":
query["limit"] = args.limit
return query
def import_api_client():
sys.path.insert(0, RUNTIME_PATH)
try:
from data_api import ApiClient # type: ignore
except Exception as exc: # pragma: no cover
raise RuntimeError(
"data_api import failed. Ensure runtime is mounted to /opt/.manus/.sandbox-runtime"
) from exc
return ApiClient
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Call SimilarWeb APIs using ApiClient inside Docker and persist output JSON."
)
parser.add_argument("--api", choices=sorted(API_MAP.keys()))
parser.add_argument("--domain")
parser.add_argument("--start-date")
parser.add_argument("--end-date")
parser.add_argument("--country", default="world")
parser.add_argument("--granularity", default="monthly")
parser.add_argument("--limit", type=int, default=10)
parser.add_argument("--main-domain-only", action="store_true")
parser.add_argument("--output")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--mock-result", action="store_true")
parser.add_argument("--self-test", action="store_true")
return parser.parse_args()
def write_payload(path: str, payload: Dict[str, object]) -> None:
parent = os.path.dirname(path)
if parent:
os.makedirs(parent, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
f.write("\n")
def run() -> int:
args = parse_args()
if args.self_test:
result = {
"ok": True,
"runtime_path": RUNTIME_PATH,
"runtime_exists": os.path.isdir(RUNTIME_PATH),
"python_version": sys.version.split()[0],
}
print(json.dumps(result, ensure_ascii=False))
return 0
if not args.api or not args.domain:
raise ValueError("--api and --domain are required unless --self-test is used")
if args.limit < 1 or args.limit > 10:
raise ValueError("--limit must be between 1 and 10")
start_ym, end_ym = default_date_range(args.api, args.start_date, args.end_date)
validate_range(args.api, start_ym, end_ym)
endpoint = API_MAP[args.api]
query = build_query(args, start_ym, end_ym)
output_path = resolve_output_path(args.api, args.domain, args.output)
request_meta = {
"api": args.api,
"endpoint": endpoint,
"domain": args.domain,
"query": query,
"output": output_path,
"dry_run": bool(args.dry_run),
"mock_result": bool(args.mock_result),
}
if args.dry_run:
print(json.dumps({"ok": True, "request": request_meta}, ensure_ascii=False))
return 0
if args.mock_result:
payload = {
"request": request_meta,
"result": {
"source": "mock",
"message": "mock_result enabled",
},
}
write_payload(output_path, payload)
print(json.dumps({"ok": True, "output": output_path, "mode": "mock"}, ensure_ascii=False))
return 0
ApiClient = import_api_client()
client = ApiClient()
result = client.call_api(endpoint, path_params={"domain": args.domain}, query=query)
payload = {"request": request_meta, "result": result}
write_payload(output_path, payload)
print(json.dumps({"ok": True, "output": output_path, "endpoint": endpoint}, ensure_ascii=False))
return 0
if __name__ == "__main__":
try:
raise SystemExit(run())
except Exception as exc:
print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr)
raise SystemExit(1)