250 行
7.8 KiB
Python
可执行文件
250 行
7.8 KiB
Python
可执行文件
#!/usr/bin/env python3
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Dict, Optional, Tuple
|
|
|
|
RUNTIME_PATH = "/opt/.manus/.sandbox-runtime"
|
|
|
|
API_MAP = {
|
|
"global-rank": "SimilarWeb/get_global_rank",
|
|
"visits-total": "SimilarWeb/get_visits_total",
|
|
"unique-visit": "SimilarWeb/get_unique_visit",
|
|
"bounce-rate": "SimilarWeb/get_bounce_rate",
|
|
"traffic-sources-desktop": "SimilarWeb/get_traffic_sources_desktop",
|
|
"traffic-sources-mobile": "SimilarWeb/get_traffic_sources_mobile",
|
|
"traffic-by-country": "SimilarWeb/get_total_traffic_by_country",
|
|
}
|
|
|
|
DEFAULT_MONTHS = {
|
|
"global-rank": 6,
|
|
"visits-total": 6,
|
|
"unique-visit": 6,
|
|
"bounce-rate": 6,
|
|
"traffic-sources-desktop": 3,
|
|
"traffic-sources-mobile": 3,
|
|
"traffic-by-country": 3,
|
|
}
|
|
|
|
COUNTRY_REQUIRED_APIS = {
|
|
"visits-total",
|
|
"bounce-rate",
|
|
"traffic-sources-desktop",
|
|
"traffic-sources-mobile",
|
|
}
|
|
|
|
DATE_RE = re.compile(r"^\d{4}-\d{2}$")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class YearMonth:
|
|
year: int
|
|
month: int
|
|
|
|
def to_string(self) -> str:
|
|
return f"{self.year:04d}-{self.month:02d}"
|
|
|
|
def __lt__(self, other: "YearMonth") -> bool:
|
|
return (self.year, self.month) < (other.year, other.month)
|
|
|
|
def __le__(self, other: "YearMonth") -> bool:
|
|
return (self.year, self.month) <= (other.year, other.month)
|
|
|
|
|
|
def parse_ym(value: str, field: str) -> YearMonth:
|
|
if not DATE_RE.match(value):
|
|
raise ValueError(f"{field} must be YYYY-MM, got {value!r}")
|
|
year = int(value[0:4])
|
|
month = int(value[5:7])
|
|
if month < 1 or month > 12:
|
|
raise ValueError(f"{field} month must be in 01..12, got {value!r}")
|
|
return YearMonth(year, month)
|
|
|
|
|
|
def shift_months(ym: YearMonth, delta: int) -> YearMonth:
|
|
zero_based = ym.year * 12 + (ym.month - 1) + delta
|
|
if zero_based < 0:
|
|
raise ValueError("date range underflow")
|
|
return YearMonth(zero_based // 12, (zero_based % 12) + 1)
|
|
|
|
|
|
def month_span(start: YearMonth, end: YearMonth) -> int:
|
|
return (end.year - start.year) * 12 + (end.month - start.month) + 1
|
|
|
|
|
|
def last_complete_month(today: date) -> YearMonth:
|
|
current = YearMonth(today.year, today.month)
|
|
return shift_months(current, -1)
|
|
|
|
|
|
def default_date_range(api: str, start: Optional[str], end: Optional[str]) -> Tuple[YearMonth, YearMonth]:
|
|
window = DEFAULT_MONTHS[api]
|
|
lcm = last_complete_month(date.today())
|
|
|
|
end_ym = parse_ym(end, "end_date") if end else lcm
|
|
start_ym = parse_ym(start, "start_date") if start else shift_months(end_ym, -(window - 1))
|
|
|
|
return start_ym, end_ym
|
|
|
|
|
|
def validate_range(api: str, start_ym: YearMonth, end_ym: YearMonth) -> None:
|
|
if end_ym < start_ym:
|
|
raise ValueError("end_date must be >= start_date")
|
|
|
|
lcm = last_complete_month(date.today())
|
|
oldest_allowed = shift_months(lcm, -11)
|
|
|
|
if end_ym > lcm:
|
|
raise ValueError(f"end_date must be <= last complete month {lcm.to_string()}")
|
|
if start_ym < oldest_allowed:
|
|
raise ValueError(f"start_date must be >= {oldest_allowed.to_string()} (12-month lookback)")
|
|
|
|
span = month_span(start_ym, end_ym)
|
|
if span > 12:
|
|
raise ValueError("date range cannot exceed 12 months")
|
|
if api == "traffic-by-country" and span > 3:
|
|
raise ValueError("traffic-by-country supports at most 3 months")
|
|
|
|
|
|
def sanitize_filename(value: str) -> str:
|
|
safe = re.sub(r"[^a-zA-Z0-9_.-]+", "-", value.strip())
|
|
return safe.strip("-") or "result"
|
|
|
|
|
|
def resolve_output_path(api: str, domain: str, output: Optional[str]) -> str:
|
|
if output:
|
|
return output
|
|
file_name = f"{sanitize_filename(api)}-{sanitize_filename(domain)}.json"
|
|
return os.path.join("/data", file_name)
|
|
|
|
|
|
def build_query(args: argparse.Namespace, start_ym: YearMonth, end_ym: YearMonth) -> Dict[str, object]:
|
|
query: Dict[str, object] = {
|
|
"start_date": start_ym.to_string(),
|
|
"end_date": end_ym.to_string(),
|
|
}
|
|
|
|
if args.main_domain_only:
|
|
query["main_domain_only"] = True
|
|
|
|
if args.api in COUNTRY_REQUIRED_APIS:
|
|
query["country"] = args.country
|
|
query["granularity"] = args.granularity
|
|
elif args.api == "traffic-by-country":
|
|
query["limit"] = args.limit
|
|
|
|
return query
|
|
|
|
|
|
def import_api_client():
|
|
sys.path.insert(0, RUNTIME_PATH)
|
|
try:
|
|
from data_api import ApiClient # type: ignore
|
|
except Exception as exc: # pragma: no cover
|
|
raise RuntimeError(
|
|
"data_api import failed. Ensure runtime is mounted to /opt/.manus/.sandbox-runtime"
|
|
) from exc
|
|
return ApiClient
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Call SimilarWeb APIs using ApiClient inside Docker and persist output JSON."
|
|
)
|
|
parser.add_argument("--api", choices=sorted(API_MAP.keys()))
|
|
parser.add_argument("--domain")
|
|
parser.add_argument("--start-date")
|
|
parser.add_argument("--end-date")
|
|
parser.add_argument("--country", default="world")
|
|
parser.add_argument("--granularity", default="monthly")
|
|
parser.add_argument("--limit", type=int, default=10)
|
|
parser.add_argument("--main-domain-only", action="store_true")
|
|
parser.add_argument("--output")
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--mock-result", action="store_true")
|
|
parser.add_argument("--self-test", action="store_true")
|
|
return parser.parse_args()
|
|
|
|
|
|
def write_payload(path: str, payload: Dict[str, object]) -> None:
|
|
parent = os.path.dirname(path)
|
|
if parent:
|
|
os.makedirs(parent, exist_ok=True)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(payload, f, ensure_ascii=False, indent=2)
|
|
f.write("\n")
|
|
|
|
|
|
def run() -> int:
|
|
args = parse_args()
|
|
|
|
if args.self_test:
|
|
result = {
|
|
"ok": True,
|
|
"runtime_path": RUNTIME_PATH,
|
|
"runtime_exists": os.path.isdir(RUNTIME_PATH),
|
|
"python_version": sys.version.split()[0],
|
|
}
|
|
print(json.dumps(result, ensure_ascii=False))
|
|
return 0
|
|
|
|
if not args.api or not args.domain:
|
|
raise ValueError("--api and --domain are required unless --self-test is used")
|
|
|
|
if args.limit < 1 or args.limit > 10:
|
|
raise ValueError("--limit must be between 1 and 10")
|
|
|
|
start_ym, end_ym = default_date_range(args.api, args.start_date, args.end_date)
|
|
validate_range(args.api, start_ym, end_ym)
|
|
|
|
endpoint = API_MAP[args.api]
|
|
query = build_query(args, start_ym, end_ym)
|
|
output_path = resolve_output_path(args.api, args.domain, args.output)
|
|
|
|
request_meta = {
|
|
"api": args.api,
|
|
"endpoint": endpoint,
|
|
"domain": args.domain,
|
|
"query": query,
|
|
"output": output_path,
|
|
"dry_run": bool(args.dry_run),
|
|
"mock_result": bool(args.mock_result),
|
|
}
|
|
|
|
if args.dry_run:
|
|
print(json.dumps({"ok": True, "request": request_meta}, ensure_ascii=False))
|
|
return 0
|
|
|
|
if args.mock_result:
|
|
payload = {
|
|
"request": request_meta,
|
|
"result": {
|
|
"source": "mock",
|
|
"message": "mock_result enabled",
|
|
},
|
|
}
|
|
write_payload(output_path, payload)
|
|
print(json.dumps({"ok": True, "output": output_path, "mode": "mock"}, ensure_ascii=False))
|
|
return 0
|
|
|
|
ApiClient = import_api_client()
|
|
client = ApiClient()
|
|
result = client.call_api(endpoint, path_params={"domain": args.domain}, query=query)
|
|
payload = {"request": request_meta, "result": result}
|
|
write_payload(output_path, payload)
|
|
|
|
print(json.dumps({"ok": True, "output": output_path, "endpoint": endpoint}, ensure_ascii=False))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
raise SystemExit(run())
|
|
except Exception as exc:
|
|
print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr)
|
|
raise SystemExit(1)
|