Add similarweb-analytics Docker sandbox skill
这个提交包含在:
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
RUNTIME_PATH = "/opt/.manus/.sandbox-runtime"
|
||||
|
||||
API_MAP = {
|
||||
"global-rank": "SimilarWeb/get_global_rank",
|
||||
"visits-total": "SimilarWeb/get_visits_total",
|
||||
"unique-visit": "SimilarWeb/get_unique_visit",
|
||||
"bounce-rate": "SimilarWeb/get_bounce_rate",
|
||||
"traffic-sources-desktop": "SimilarWeb/get_traffic_sources_desktop",
|
||||
"traffic-sources-mobile": "SimilarWeb/get_traffic_sources_mobile",
|
||||
"traffic-by-country": "SimilarWeb/get_total_traffic_by_country",
|
||||
}
|
||||
|
||||
DEFAULT_MONTHS = {
|
||||
"global-rank": 6,
|
||||
"visits-total": 6,
|
||||
"unique-visit": 6,
|
||||
"bounce-rate": 6,
|
||||
"traffic-sources-desktop": 3,
|
||||
"traffic-sources-mobile": 3,
|
||||
"traffic-by-country": 3,
|
||||
}
|
||||
|
||||
COUNTRY_REQUIRED_APIS = {
|
||||
"visits-total",
|
||||
"bounce-rate",
|
||||
"traffic-sources-desktop",
|
||||
"traffic-sources-mobile",
|
||||
}
|
||||
|
||||
DATE_RE = re.compile(r"^\d{4}-\d{2}$")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class YearMonth:
|
||||
year: int
|
||||
month: int
|
||||
|
||||
def to_string(self) -> str:
|
||||
return f"{self.year:04d}-{self.month:02d}"
|
||||
|
||||
def __lt__(self, other: "YearMonth") -> bool:
|
||||
return (self.year, self.month) < (other.year, other.month)
|
||||
|
||||
def __le__(self, other: "YearMonth") -> bool:
|
||||
return (self.year, self.month) <= (other.year, other.month)
|
||||
|
||||
|
||||
def parse_ym(value: str, field: str) -> YearMonth:
|
||||
if not DATE_RE.match(value):
|
||||
raise ValueError(f"{field} must be YYYY-MM, got {value!r}")
|
||||
year = int(value[0:4])
|
||||
month = int(value[5:7])
|
||||
if month < 1 or month > 12:
|
||||
raise ValueError(f"{field} month must be in 01..12, got {value!r}")
|
||||
return YearMonth(year, month)
|
||||
|
||||
|
||||
def shift_months(ym: YearMonth, delta: int) -> YearMonth:
|
||||
zero_based = ym.year * 12 + (ym.month - 1) + delta
|
||||
if zero_based < 0:
|
||||
raise ValueError("date range underflow")
|
||||
return YearMonth(zero_based // 12, (zero_based % 12) + 1)
|
||||
|
||||
|
||||
def month_span(start: YearMonth, end: YearMonth) -> int:
|
||||
return (end.year - start.year) * 12 + (end.month - start.month) + 1
|
||||
|
||||
|
||||
def last_complete_month(today: date) -> YearMonth:
|
||||
current = YearMonth(today.year, today.month)
|
||||
return shift_months(current, -1)
|
||||
|
||||
|
||||
def default_date_range(api: str, start: Optional[str], end: Optional[str]) -> Tuple[YearMonth, YearMonth]:
|
||||
window = DEFAULT_MONTHS[api]
|
||||
lcm = last_complete_month(date.today())
|
||||
|
||||
end_ym = parse_ym(end, "end_date") if end else lcm
|
||||
start_ym = parse_ym(start, "start_date") if start else shift_months(end_ym, -(window - 1))
|
||||
|
||||
return start_ym, end_ym
|
||||
|
||||
|
||||
def validate_range(api: str, start_ym: YearMonth, end_ym: YearMonth) -> None:
|
||||
if end_ym < start_ym:
|
||||
raise ValueError("end_date must be >= start_date")
|
||||
|
||||
lcm = last_complete_month(date.today())
|
||||
oldest_allowed = shift_months(lcm, -11)
|
||||
|
||||
if end_ym > lcm:
|
||||
raise ValueError(f"end_date must be <= last complete month {lcm.to_string()}")
|
||||
if start_ym < oldest_allowed:
|
||||
raise ValueError(f"start_date must be >= {oldest_allowed.to_string()} (12-month lookback)")
|
||||
|
||||
span = month_span(start_ym, end_ym)
|
||||
if span > 12:
|
||||
raise ValueError("date range cannot exceed 12 months")
|
||||
if api == "traffic-by-country" and span > 3:
|
||||
raise ValueError("traffic-by-country supports at most 3 months")
|
||||
|
||||
|
||||
def sanitize_filename(value: str) -> str:
|
||||
safe = re.sub(r"[^a-zA-Z0-9_.-]+", "-", value.strip())
|
||||
return safe.strip("-") or "result"
|
||||
|
||||
|
||||
def resolve_output_path(api: str, domain: str, output: Optional[str]) -> str:
|
||||
if output:
|
||||
return output
|
||||
file_name = f"{sanitize_filename(api)}-{sanitize_filename(domain)}.json"
|
||||
return os.path.join("/data", file_name)
|
||||
|
||||
|
||||
def build_query(args: argparse.Namespace, start_ym: YearMonth, end_ym: YearMonth) -> Dict[str, object]:
|
||||
query: Dict[str, object] = {
|
||||
"start_date": start_ym.to_string(),
|
||||
"end_date": end_ym.to_string(),
|
||||
}
|
||||
|
||||
if args.main_domain_only:
|
||||
query["main_domain_only"] = True
|
||||
|
||||
if args.api in COUNTRY_REQUIRED_APIS:
|
||||
query["country"] = args.country
|
||||
query["granularity"] = args.granularity
|
||||
elif args.api == "traffic-by-country":
|
||||
query["limit"] = args.limit
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def import_api_client():
|
||||
sys.path.insert(0, RUNTIME_PATH)
|
||||
try:
|
||||
from data_api import ApiClient # type: ignore
|
||||
except Exception as exc: # pragma: no cover
|
||||
raise RuntimeError(
|
||||
"data_api import failed. Ensure runtime is mounted to /opt/.manus/.sandbox-runtime"
|
||||
) from exc
|
||||
return ApiClient
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Call SimilarWeb APIs using ApiClient inside Docker and persist output JSON."
|
||||
)
|
||||
parser.add_argument("--api", choices=sorted(API_MAP.keys()))
|
||||
parser.add_argument("--domain")
|
||||
parser.add_argument("--start-date")
|
||||
parser.add_argument("--end-date")
|
||||
parser.add_argument("--country", default="world")
|
||||
parser.add_argument("--granularity", default="monthly")
|
||||
parser.add_argument("--limit", type=int, default=10)
|
||||
parser.add_argument("--main-domain-only", action="store_true")
|
||||
parser.add_argument("--output")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--mock-result", action="store_true")
|
||||
parser.add_argument("--self-test", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def write_payload(path: str, payload: Dict[str, object]) -> None:
|
||||
parent = os.path.dirname(path)
|
||||
if parent:
|
||||
os.makedirs(parent, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(payload, f, ensure_ascii=False, indent=2)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def run() -> int:
|
||||
args = parse_args()
|
||||
|
||||
if args.self_test:
|
||||
result = {
|
||||
"ok": True,
|
||||
"runtime_path": RUNTIME_PATH,
|
||||
"runtime_exists": os.path.isdir(RUNTIME_PATH),
|
||||
"python_version": sys.version.split()[0],
|
||||
}
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
if not args.api or not args.domain:
|
||||
raise ValueError("--api and --domain are required unless --self-test is used")
|
||||
|
||||
if args.limit < 1 or args.limit > 10:
|
||||
raise ValueError("--limit must be between 1 and 10")
|
||||
|
||||
start_ym, end_ym = default_date_range(args.api, args.start_date, args.end_date)
|
||||
validate_range(args.api, start_ym, end_ym)
|
||||
|
||||
endpoint = API_MAP[args.api]
|
||||
query = build_query(args, start_ym, end_ym)
|
||||
output_path = resolve_output_path(args.api, args.domain, args.output)
|
||||
|
||||
request_meta = {
|
||||
"api": args.api,
|
||||
"endpoint": endpoint,
|
||||
"domain": args.domain,
|
||||
"query": query,
|
||||
"output": output_path,
|
||||
"dry_run": bool(args.dry_run),
|
||||
"mock_result": bool(args.mock_result),
|
||||
}
|
||||
|
||||
if args.dry_run:
|
||||
print(json.dumps({"ok": True, "request": request_meta}, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
if args.mock_result:
|
||||
payload = {
|
||||
"request": request_meta,
|
||||
"result": {
|
||||
"source": "mock",
|
||||
"message": "mock_result enabled",
|
||||
},
|
||||
}
|
||||
write_payload(output_path, payload)
|
||||
print(json.dumps({"ok": True, "output": output_path, "mode": "mock"}, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
ApiClient = import_api_client()
|
||||
client = ApiClient()
|
||||
result = client.call_api(endpoint, path_params={"domain": args.domain}, query=query)
|
||||
payload = {"request": request_meta, "result": result}
|
||||
write_payload(output_path, payload)
|
||||
|
||||
print(json.dumps({"ok": True, "output": output_path, "endpoint": endpoint}, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(run())
|
||||
except Exception as exc:
|
||||
print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
在新工单中引用
屏蔽一个用户