#!/usr/bin/env python3 import argparse import json import os import re import sys from dataclasses import dataclass from datetime import date from typing import Dict, Optional, Tuple RUNTIME_PATH = "/opt/.manus/.sandbox-runtime" API_MAP = { "global-rank": "SimilarWeb/get_global_rank", "visits-total": "SimilarWeb/get_visits_total", "unique-visit": "SimilarWeb/get_unique_visit", "bounce-rate": "SimilarWeb/get_bounce_rate", "traffic-sources-desktop": "SimilarWeb/get_traffic_sources_desktop", "traffic-sources-mobile": "SimilarWeb/get_traffic_sources_mobile", "traffic-by-country": "SimilarWeb/get_total_traffic_by_country", } DEFAULT_MONTHS = { "global-rank": 6, "visits-total": 6, "unique-visit": 6, "bounce-rate": 6, "traffic-sources-desktop": 3, "traffic-sources-mobile": 3, "traffic-by-country": 3, } COUNTRY_REQUIRED_APIS = { "visits-total", "bounce-rate", "traffic-sources-desktop", "traffic-sources-mobile", } DATE_RE = re.compile(r"^\d{4}-\d{2}$") @dataclass(frozen=True) class YearMonth: year: int month: int def to_string(self) -> str: return f"{self.year:04d}-{self.month:02d}" def __lt__(self, other: "YearMonth") -> bool: return (self.year, self.month) < (other.year, other.month) def __le__(self, other: "YearMonth") -> bool: return (self.year, self.month) <= (other.year, other.month) def parse_ym(value: str, field: str) -> YearMonth: if not DATE_RE.match(value): raise ValueError(f"{field} must be YYYY-MM, got {value!r}") year = int(value[0:4]) month = int(value[5:7]) if month < 1 or month > 12: raise ValueError(f"{field} month must be in 01..12, got {value!r}") return YearMonth(year, month) def shift_months(ym: YearMonth, delta: int) -> YearMonth: zero_based = ym.year * 12 + (ym.month - 1) + delta if zero_based < 0: raise ValueError("date range underflow") return YearMonth(zero_based // 12, (zero_based % 12) + 1) def month_span(start: YearMonth, end: YearMonth) -> int: return (end.year - start.year) * 12 + (end.month - start.month) + 1 def last_complete_month(today: date) -> YearMonth: current = YearMonth(today.year, today.month) return shift_months(current, -1) def default_date_range(api: str, start: Optional[str], end: Optional[str]) -> Tuple[YearMonth, YearMonth]: window = DEFAULT_MONTHS[api] lcm = last_complete_month(date.today()) end_ym = parse_ym(end, "end_date") if end else lcm start_ym = parse_ym(start, "start_date") if start else shift_months(end_ym, -(window - 1)) return start_ym, end_ym def validate_range(api: str, start_ym: YearMonth, end_ym: YearMonth) -> None: if end_ym < start_ym: raise ValueError("end_date must be >= start_date") lcm = last_complete_month(date.today()) oldest_allowed = shift_months(lcm, -11) if end_ym > lcm: raise ValueError(f"end_date must be <= last complete month {lcm.to_string()}") if start_ym < oldest_allowed: raise ValueError(f"start_date must be >= {oldest_allowed.to_string()} (12-month lookback)") span = month_span(start_ym, end_ym) if span > 12: raise ValueError("date range cannot exceed 12 months") if api == "traffic-by-country" and span > 3: raise ValueError("traffic-by-country supports at most 3 months") def sanitize_filename(value: str) -> str: safe = re.sub(r"[^a-zA-Z0-9_.-]+", "-", value.strip()) return safe.strip("-") or "result" def resolve_output_path(api: str, domain: str, output: Optional[str]) -> str: if output: return output file_name = f"{sanitize_filename(api)}-{sanitize_filename(domain)}.json" return os.path.join("/data", file_name) def build_query(args: argparse.Namespace, start_ym: YearMonth, end_ym: YearMonth) -> Dict[str, object]: query: Dict[str, object] = { "start_date": start_ym.to_string(), "end_date": end_ym.to_string(), } if args.main_domain_only: query["main_domain_only"] = True if args.api in COUNTRY_REQUIRED_APIS: query["country"] = args.country query["granularity"] = args.granularity elif args.api == "traffic-by-country": query["limit"] = args.limit return query def import_api_client(): sys.path.insert(0, RUNTIME_PATH) try: from data_api import ApiClient # type: ignore except Exception as exc: # pragma: no cover raise RuntimeError( "data_api import failed. Ensure runtime is mounted to /opt/.manus/.sandbox-runtime" ) from exc return ApiClient def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Call SimilarWeb APIs using ApiClient inside Docker and persist output JSON." ) parser.add_argument("--api", choices=sorted(API_MAP.keys())) parser.add_argument("--domain") parser.add_argument("--start-date") parser.add_argument("--end-date") parser.add_argument("--country", default="world") parser.add_argument("--granularity", default="monthly") parser.add_argument("--limit", type=int, default=10) parser.add_argument("--main-domain-only", action="store_true") parser.add_argument("--output") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--mock-result", action="store_true") parser.add_argument("--self-test", action="store_true") return parser.parse_args() def write_payload(path: str, payload: Dict[str, object]) -> None: parent = os.path.dirname(path) if parent: os.makedirs(parent, exist_ok=True) with open(path, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2) f.write("\n") def run() -> int: args = parse_args() if args.self_test: result = { "ok": True, "runtime_path": RUNTIME_PATH, "runtime_exists": os.path.isdir(RUNTIME_PATH), "python_version": sys.version.split()[0], } print(json.dumps(result, ensure_ascii=False)) return 0 if not args.api or not args.domain: raise ValueError("--api and --domain are required unless --self-test is used") if args.limit < 1 or args.limit > 10: raise ValueError("--limit must be between 1 and 10") start_ym, end_ym = default_date_range(args.api, args.start_date, args.end_date) validate_range(args.api, start_ym, end_ym) endpoint = API_MAP[args.api] query = build_query(args, start_ym, end_ym) output_path = resolve_output_path(args.api, args.domain, args.output) request_meta = { "api": args.api, "endpoint": endpoint, "domain": args.domain, "query": query, "output": output_path, "dry_run": bool(args.dry_run), "mock_result": bool(args.mock_result), } if args.dry_run: print(json.dumps({"ok": True, "request": request_meta}, ensure_ascii=False)) return 0 if args.mock_result: payload = { "request": request_meta, "result": { "source": "mock", "message": "mock_result enabled", }, } write_payload(output_path, payload) print(json.dumps({"ok": True, "output": output_path, "mode": "mock"}, ensure_ascii=False)) return 0 ApiClient = import_api_client() client = ApiClient() result = client.call_api(endpoint, path_params={"domain": args.domain}, query=query) payload = {"request": request_meta, "result": result} write_payload(output_path, payload) print(json.dumps({"ok": True, "output": output_path, "endpoint": endpoint}, ensure_ascii=False)) return 0 if __name__ == "__main__": try: raise SystemExit(run()) except Exception as exc: print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr) raise SystemExit(1)