From 5e0c64db9e25b70b0f42145173a3f6ab4f3d197b Mon Sep 17 00:00:00 2001 From: cryptocommuniums-afk Date: Thu, 5 Mar 2026 10:28:14 +0800 Subject: [PATCH] Add similarweb-analytics Docker sandbox skill --- similarweb-analytics/SKILL.md | 156 +++++++++++ similarweb-analytics/agents/openai.yaml | 4 + similarweb-analytics/references/api-matrix.md | 54 ++++ .../scripts/docker/Dockerfile | 13 + .../__pycache__/entrypoint.cpython-312.pyc | Bin 0 -> 12472 bytes .../scripts/docker/entrypoint.py | 249 ++++++++++++++++++ .../scripts/install_runtime_adapter.sh | 38 +++ similarweb-analytics/scripts/run_in_docker.sh | 128 +++++++++ .../__pycache__/data_api.cpython-312.pyc | Bin 0 -> 8392 bytes .../scripts/runtime/data_api.py | 166 ++++++++++++ .../scripts/test_docker_workflow.sh | 40 +++ .../scripts/tests/fixtures/data_api.py | 8 + 12 files changed, 856 insertions(+) create mode 100644 similarweb-analytics/SKILL.md create mode 100644 similarweb-analytics/agents/openai.yaml create mode 100644 similarweb-analytics/references/api-matrix.md create mode 100644 similarweb-analytics/scripts/docker/Dockerfile create mode 100644 similarweb-analytics/scripts/docker/__pycache__/entrypoint.cpython-312.pyc create mode 100755 similarweb-analytics/scripts/docker/entrypoint.py create mode 100755 similarweb-analytics/scripts/install_runtime_adapter.sh create mode 100755 similarweb-analytics/scripts/run_in_docker.sh create mode 100644 similarweb-analytics/scripts/runtime/__pycache__/data_api.cpython-312.pyc create mode 100755 similarweb-analytics/scripts/runtime/data_api.py create mode 100755 similarweb-analytics/scripts/test_docker_workflow.sh create mode 100644 similarweb-analytics/scripts/tests/fixtures/data_api.py diff --git a/similarweb-analytics/SKILL.md b/similarweb-analytics/SKILL.md new file mode 100644 index 0000000..c0e815c --- /dev/null +++ b/similarweb-analytics/SKILL.md @@ -0,0 +1,156 @@ +--- +name: similarweb-analytics +description: Analyze website and domain traffic with SimilarWeb APIs through a Docker sandbox. Use for visits, unique visitors, rank, bounce rate, traffic sources, traffic by country, and domain comparison research. +--- + +# SimilarWeb Analytics + +## Overview + +Use this skill to run SimilarWeb analytics in an isolated Docker container and save every API response to JSON immediately. +Use it when the user asks about domain traffic, popularity ranking, engagement quality, channel mix, or country-level traffic split. + +## Trigger Cues + +Use this skill when the request includes one or more of these cues: +- Domain inputs such as `google.com`, `amazon.com`, `openai.com` +- Traffic words such as `visits`, `unique visitors`, `traffic trend` +- Ranking words such as `global rank`, `website rank` +- Engagement words such as `bounce rate`, `pages per visit`, `visit duration` +- Source words such as `organic`, `paid`, `direct`, `social`, `referrals` +- Geography words such as `top countries`, `country split`, `regional traffic` +- Comparison words such as `compare`, `vs`, `benchmark` + +## Workflow + +1. Parse user intent into API call inputs: + - `domain` (required) + - `api` (required) + - Optional: `start_date`, `end_date`, `country`, `granularity`, `limit`, `main_domain_only` +2. Build image when needed: + - Run `scripts/run_in_docker.sh --build -- --self-test` +3. Execute query in Docker sandbox: + - Run `scripts/run_in_docker.sh -- --api --domain ...` +4. Persist output on every call: + - Always pass `--output /data/.json` or let auto filename run in `/data` + - Never keep API output only in terminal output +5. For comparisons: + - Execute one call per domain with the same time window + - Save each domain response as a separate JSON file for reproducible analysis + +## Command Entry Points + +- Main host wrapper: `scripts/run_in_docker.sh` +- Container entrypoint: `scripts/docker/entrypoint.py` +- Image definition: `scripts/docker/Dockerfile` +- Runtime adapter installer: `scripts/install_runtime_adapter.sh` +- Runtime adapter source: `scripts/runtime/data_api.py` +- Test runner: `scripts/test_docker_workflow.sh` + +## Quick Start + +Install runtime adapter to expected host path: + +```bash +/root/.codex/skills/similarweb-analytics/scripts/install_runtime_adapter.sh +``` + +Build image and verify runtime: + +```bash +/root/.codex/skills/similarweb-analytics/scripts/run_in_docker.sh --build -- --self-test +``` + +Dry run without consuming API credits: + +```bash +/root/.codex/skills/similarweb-analytics/scripts/run_in_docker.sh -- \ + --api visits-total \ + --domain amazon.com \ + --country world \ + --dry-run +``` + +Real call and save data immediately: + +```bash +/root/.codex/skills/similarweb-analytics/scripts/run_in_docker.sh -- \ + --api traffic-by-country \ + --domain amazon.com \ + --start-date 2025-12 \ + --end-date 2026-02 \ + --limit 10 \ + --output /data/amazon-country.json +``` + +## Supported APIs + +- `global-rank` -> `SimilarWeb/get_global_rank` +- `visits-total` -> `SimilarWeb/get_visits_total` +- `unique-visit` -> `SimilarWeb/get_unique_visit` +- `bounce-rate` -> `SimilarWeb/get_bounce_rate` +- `traffic-sources-desktop` -> `SimilarWeb/get_traffic_sources_desktop` +- `traffic-sources-mobile` -> `SimilarWeb/get_traffic_sources_mobile` +- `traffic-by-country` -> `SimilarWeb/get_total_traffic_by_country` + +For parameter matrix and defaults, see `references/api-matrix.md`. + +## Sandbox Rules + +`scripts/run_in_docker.sh` runs with: +- Non-root container user +- Read-only root filesystem +- `tmpfs` only for `/tmp` and `/var/tmp` +- Dropped Linux capabilities (`--cap-drop ALL`) +- `no-new-privileges` enabled +- CPU, memory, and PID limits + +Runtime dependency mount: +- Must mount host runtime path into container at `/opt/.manus/.sandbox-runtime` +- Default host path is `/opt/.manus/.sandbox-runtime` +- You can override with `--runtime-dir ` +- `data_api.py` must exist in that runtime directory + +Credential pass-through: +- `SIMILARWEB_API_KEY` for official Similarweb API mode +- Optional fallback: `RAPIDAPI_KEY` and `RAPIDAPI_SIMILARWEB_HOST` +- Runner auto-forwards these env vars into container when present + +## Data Constraints + +- Historical data window is at most 12 months +- `traffic-by-country` is limited to at most 3 months +- Latest reliable month is the last complete month +- Default date range: + - 6 months: `global-rank`, `visits-total`, `unique-visit`, `bounce-rate` + - 3 months: `traffic-sources-desktop`, `traffic-sources-mobile`, `traffic-by-country` + +## Validation Record + +Last validated on `2026-03-05`: +- Docker image build succeeded +- Container self-test succeeded +- End-to-end fixture call succeeded and wrote JSON output +- Skill structure validation succeeded with `quick_validate.py` +- Runtime adapter installed to `/opt/.manus/.sandbox-runtime/data_api.py` and imported successfully +- Official mode live call attempted and failed fast with explicit credential error when `SIMILARWEB_API_KEY` is unset +- Live network call attempted via RapidAPI fallback; request reached provider and returned `403 not subscribed` (credential/subscription issue, not runtime failure) + +## Troubleshooting + +- Error `data_api import failed`: + - Check that runtime path exists on host and is mounted to `/opt/.manus/.sandbox-runtime` +- Error about date range: + - Use `YYYY-MM` format and keep range inside API limits +- No output file: + - Ensure output points to `/data/...` inside container or mounted output directory from host + +## Resources + +- `scripts/docker/Dockerfile`: container image for sandbox runtime +- `scripts/docker/entrypoint.py`: SimilarWeb API caller inside container +- `scripts/run_in_docker.sh`: host wrapper for build and secure execution +- `scripts/install_runtime_adapter.sh`: install runtime adapter into `/opt/.manus/.sandbox-runtime` +- `scripts/runtime/data_api.py`: `ApiClient` adapter implementation +- `scripts/test_docker_workflow.sh`: reproducible smoke test script +- `references/api-matrix.md`: endpoint and parameter matrix diff --git a/similarweb-analytics/agents/openai.yaml b/similarweb-analytics/agents/openai.yaml new file mode 100644 index 0000000..be4d244 --- /dev/null +++ b/similarweb-analytics/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "SimilarWeb Analytics" + short_description: "Analyze domains with SimilarWeb in a Docker sandbox" + default_prompt: "Analyze traffic, rank, sources, and geography for a domain using Dockerized SimilarWeb workflow." diff --git a/similarweb-analytics/references/api-matrix.md b/similarweb-analytics/references/api-matrix.md new file mode 100644 index 0000000..ba5789d --- /dev/null +++ b/similarweb-analytics/references/api-matrix.md @@ -0,0 +1,54 @@ +# SimilarWeb API Matrix + +## Endpoint Mapping + +| CLI `--api` value | API name | Default window | +| --- | --- | --- | +| `global-rank` | `SimilarWeb/get_global_rank` | 6 months | +| `visits-total` | `SimilarWeb/get_visits_total` | 6 months | +| `unique-visit` | `SimilarWeb/get_unique_visit` | 6 months | +| `bounce-rate` | `SimilarWeb/get_bounce_rate` | 6 months | +| `traffic-sources-desktop` | `SimilarWeb/get_traffic_sources_desktop` | 3 months | +| `traffic-sources-mobile` | `SimilarWeb/get_traffic_sources_mobile` | 3 months | +| `traffic-by-country` | `SimilarWeb/get_total_traffic_by_country` | 3 months | + +## Parameters + +Required: +- `domain` +- `api` + +Optional shared parameters: +- `start_date` (`YYYY-MM`) +- `end_date` (`YYYY-MM`) +- `main_domain_only` (`true` or omitted) + +Optional API-specific parameters: +- `visits-total`, `bounce-rate`, `traffic-sources-desktop`, `traffic-sources-mobile`: + - `country` (default `world`) + - `granularity` (default `monthly`) +- `traffic-by-country`: + - `limit` (default `10`, max `10`) + +## Limits + +- Maximum lookback: 12 months +- `traffic-by-country`: max 3 months range +- Granularity: monthly +- Latest dependable month: last complete month + +## Data Persistence Rule + +Write every call to a JSON file immediately to avoid data loss when credits deplete or calls fail mid-run. + +## Runtime Adapter Notes + +Runtime file: +- `/opt/.manus/.sandbox-runtime/data_api.py` + +Provisioning command: +- `/root/.codex/skills/similarweb-analytics/scripts/install_runtime_adapter.sh` + +Credential modes: +- Preferred: `SIMILARWEB_API_KEY` for official Similarweb API +- Fallback: `RAPIDAPI_KEY` and optional `RAPIDAPI_SIMILARWEB_HOST` (default `similarweb13.p.rapidapi.com`) diff --git a/similarweb-analytics/scripts/docker/Dockerfile b/similarweb-analytics/scripts/docker/Dockerfile new file mode 100644 index 0000000..f27039e --- /dev/null +++ b/similarweb-analytics/scripts/docker/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +RUN groupadd -g 10001 app && \ + useradd -m -u 10001 -g app -s /usr/sbin/nologin app + +WORKDIR /app +COPY entrypoint.py /app/entrypoint.py + +USER app +ENTRYPOINT ["python", "/app/entrypoint.py"] diff --git a/similarweb-analytics/scripts/docker/__pycache__/entrypoint.cpython-312.pyc b/similarweb-analytics/scripts/docker/__pycache__/entrypoint.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5ce5a64755ca836d25c25d39eae0a93646ed2f5 GIT binary patch literal 12472 zcmc&aYiv_jn)lk*_O-9$yqp(=#6So!;DiJyZ#p3f%R4eh%9lDwkBkk_@ zo$G5m!L-$Cr9Fwy<2&DZf3NeM`>$nXHVQ)gKjs5{%@p-}Ea-vMK;e;Mpr~sUPw{k+ z8q!jFh$dyjkb$JeAtOndAqG-I&=fKcnQ6o|1}!0Wh$VR@XbsthY$0xl3)zS405c7h z@n+t_v%HnJ@f>gG%WhbP9K8Nl&f_1#%10@_;wSV_g@F=`W9#%u7E-?QLs}~hRRNBf zuex2WLk!e&6z}{##n*fYYr*iLmQX|0e68T*>)=-}RF9bXhVL^&HT)`}nqMu{=3tEg ztK-)ItO@|myHE}N8xTIi^6LS6 z6~S(S9yegGJPq(xfUhPqsu!Fi48IY|jRda^dUhA}=LGCcP`ifTEKphd$`LVY^D;MwiJ~gx!=3cVxh=FehtaW?`jJmbBJ(;JO?}Xa*WG4)K7vx)n4g9w8 zHi{A*Rtk2%6@D%H9_y!YnVzKAQ^Lj;O58Wp1{iMm(PM)}lg?%N?NH+s+VqzEjvEGk z`|W4+(-WAn7oKc&d#7GclR5Ca^!Ar&o*rsD+jfz@NL@0%L|vrayB}f){cc*d@;=$; z5Bekt3Jeqkw^3z!1AbX$Pfo~zNZ1!tO@q;ipb%^9h)l>G?IB+{Ds{9=zA!%=x#SU} zVL1>IVygxMp+L|lz9u@9*_6yLfEW}z@^eT(Kk&%GkD@CHBUy!`KAdSnBiP(m3zzao&1F*b}E75Rv zUcJ-dNv|Kqmc>c8CB|us^Wfqh+qKnrwAI+P>3az9F{jNJ5M|+UU9K-rZi1fN1RvL~ ztk7okxJri)PkM5LKQzO^Jv2gefPqM~V#H#^VT58UR`$xv{OjANJP>zHwG6_+nn%P) zObGj-HXpD#hDZGnU84jFrP`3;G)NeM;b~+lk~3hAHwq@;3kIm&%$rcM2^Qc`GnCn) zvIWZ4N!D#utuF~a@kAsnkE+ZhWK~m$6wIO^N5!x|C#|4sio%)TiXP!e*Qhu(2IOgiVjz4zR-NC0_B?Paj4in!nx-;Uwb$HN-N~Lr z^Bg_b^1-I}H_ZwA>)*7*$|ZMwB-LHK0+<8j4(Fjam~VGoLH0QdiGv9P!SN_4>#$a2NC zkb7dC!V~KR$TNufly&a-XwR)Z3(-$r|M+!fQ%|bAM`3%&I{V!=mGyeVz7X`F+PvOS zgpUR>Z})m%i~54O8Y2*vTJH7w!eJ0cD32w;F#?T=jWDjt3`ZhCaUD*(1)^IN>7AU6 zScy=KHp5Tqh3E&=16$emj$}B;_l{;6+O#dX?GXh@)=Zmv=;X016ng0SUIg~g2<^>5 zdja|tKznIz0PI#ZUGN2?f@&HG2ti(S0%+OsnE*g&Re{VkN`_bqV+dUl;*_qhqzV~Z zk`Slg1SsMlYnWlO#u>@D87D&Ka>u&j>ZO+|IaxREy_RZAwwS;k>U#Fb;Q zPmF~UmV~$=Zq#)w$Qo;ZtkxBZO0sKMaJ>YX-X#8q9{Kf6C!v&c0sIqH*r$(_o& zU3Y5kSe1r@X$R8j66ctSCcc|^Z6@~iv(vpx_VR=&;Y-p9+uJ9n`!W{$%xjk?6Me~p ze{nS5!*}l1+=U*FrX9x=_Sh2VObjM>q`9>UvzE|}YM2bECSC}F(Sf_9hdiky0P-}k z#?vvtD=pl3Lkx6Z3|hx&jHe87APhWgkt>iB&y*x01N01-(1MPEeuciEhoYl4^^4m9 zBBDeihZ}RErQ`zje_n7!!@MAl1S1#WMDph#?nGSV61P#@jd|RB)fN-PhuTd%)d%(O*Uxvq-rMdkIvtK(0lO+2@ zSv3k_e%V3bqzKOo3`n|SH|MaM$O|khzB4MpM_|d zM7ELL@=6oFu-FZi#}J_tBG@r|`IW=7hZE6jv8%C^eeK+-6t{7H+ef=^?bf7mYV*Mq z*K^mG;+|8O=W=@@M|j_4Y13o~x&;x2Tuegg#Q>DiF$$?pNmIpw3yg9d4vRMQKm2b9 zKtV_{MP!7O02^=+64m05iXzw>%Qm4JYmlBS!s z>$bVR501Tm3`k`9xh1YHX_>1^b1e$fqOITPn+W*v@b!@A%D)Flu>g-#cra5&Igef7 zu`#qzSA><9p(!SA5bbi2Xf5nUn2s}3rZ~N4hq_N6AM~C$**|!AfNYzH2T1mth)lwYZj1OF zLi1#(nlA>zeB`2P4EjT=8OH#*#%d!>h9s2uz}grzIb6jHBfcouNjMd}6P*{t=K*OD ze$p9;;E>8zCwo$5uIWQdwlz2Sn`76<-VLU0o2PsJeUs;%GYNlk&th5Y!nWx{pKsmq z&ZVR&8DA{l@{jvIGb^XgC}&?zoqAb${#<(RE7OM`aMg(y6mGS`tR^QbA}xq$=a)X- zSAnd7*TFs<*92ys%;8vadC#n3L2b<>vZhO*%2=b^n3=t^!#G{u4w%tnCS;efwR=)itTYh3wx`a!9LEv4TU0S!2h(?O&fGD=*$@`~EicU;K zL|JnAWM}}h+LmhvOhR-X%AO)7LIFlhRU)F@W{s2b1Sy_?despL!d+PQ`huWF1s-l{ zAk!rjB!q`@OymG_Gf>1p$ULvWPx>c_K;~P@66`%oJ<)@^(~dm~yXT7)th7|Kc5eNg zl&;x4-TwgajhEwyzPjgb&#zB>dg7l?Dm{Zr!|Al+1%-VfV<}Ix-?O*~M(0BN{jQ^_ zuA}L$<4WfV<%CaZ7*0F<3hV!Zt4WN`nbX|H>E4Wmn;E`5N#?LW?Kq&Y2Qn6$;#haj zvTjbE_x<=1sd_H$IIOUTvqq|It72J4xOAD~!L=j5J-PrI0RSEv0fEGiD@K9E<9U$>aKn2HXgLsyS}3L6$MTmE9Shwh%dQGc zKiRgJseMd}gkz=-AYvcrdfWij5Rp`7!iSEMF>oUQnCP09p-w!92v*!Ma#LJk+*iXX zM1mItueN+Ib|U(r3DS&&%4(YW+)|b)ubC-Zs&1JZNLRa+s!j827q{%cGm_rYr))lS zS5PXBPana(-#NSU%D&lsU==i^xy=f*S=(gBC!UvR>=01*%k zkNLb{C%c;kVLK^_ufkZWIS`fvxWm~`0f8Jj z(dT`>d+@Nz>Vt}5M6&jQdMAE-z|+PfegvRYQf~zHzg$jhbH!zv+)n+}+RYk2v3DDdpBhb&MG2MvW5Git=jFxM*^9cGxXl!Z=P7-gbX+8;lB1iy5hHJ5KD7^cFT8F0#<{?I z;2~Qong0H+?^^Ifa-yWaZkMmpgQ}eB`YM;Q$vu&P^@^7ASEv)O0 z9qR#8(^Yuzae+{kTu})e_pUqxyMVz0yx{63o_-g+cey46Q3`<3t7%-<(Seiw?ZEUL zSg+(+0Q|Etlg9(h15k^9G!lS^s2J<>=t0uD}sd zfG4e}0KJ=ie)K2f?6l>?U=F-Ei7$FMVhENEPam$jtIS~5=GS4M-wJbfr@rcB&@rY^;RL*sVs&n=(W zKeqqOk#;|;Fm;Q}cQTAES8H2j))z{SMP_56WM5=96iVFui=^?60;qhUfq?cFK-SyF zg;NB$D-Traw=BF&UJ`Qrp(cLAY%!A?N?3!t+3AOY>oaFzGD#uV3>=9(0y ziRed70azw{lfj5@xdSgRFdf7=LpPolzOA+_IO5TVD%KS&<5Hd-^qGRt%~{AaRdm${ zE0||)n{<(~1N}w$i+xFBiUxBEK8U!@euz3B0*DXeSP5q4tpza(kCE_Kn876I@lHyj zFTyqECFp^M%XjYC+t!bdp{#HTSlAmGSLur}Q&b-D>>|*xKf(k5A9_eMxk0rHL`{2r zl0OhYZOa3hKc;PNR`kN$a%aE?M7+$XY{)k*Kmm;T$b=9EV;)R>cu|jrCV-*A&J(~$ zdllQwLPY2tx>M#n1vfT%qh9lrzVWRL z=>!4@1r+!sl*T5`WJcaxz#?yLx%N{g8GQ;Qlx`2u5<3l0bS$h0`mDfBT=LxF%u{+Z z#m#c3-WlL@>G?vcQ^hj8?S*Au94Z%di*K5F4P_(lkYZE5-}F$Zxs zw1O|rcn7pBhg&!+qx+5oaGq)%tXeAFcNf5oU$6IxHVt0 z#cgAIb(}&PXZXs}rxKv!Ca_0!N`ajKKK(yi4UAj;^wqeX{e#-Aj{zsnxN6Vc=ZGIh zb?7CcZ6wt(F)5Em!rlvLK}N!dZqXv%sbYJGwMdLh-AZ)%@Ny`;77d7SC5(oHf+V?$ z3^2SfM&Sifdym9BdT19*r+KJ;Bor4H$ zO>xo-b5=RD`@A^^pFzAqDseVo`l$?7#;S`@T;R(ahFdLIpJ46bhgV>p!i0V2;R`qM zZ?OLtAc9v=>`5C7dVwcXoB*`q&}{{KCi+4WnNKmQ_&y>v5i?M=q6z9n6I6T+`zmN* zQ$PaMD@FjST5>xK9?6MdKvqo?@Hvhw{uS1lP#sG}4mRSkMh^;kjVU;9#6g7p03-C1 z5U(1UyNE(c{4qv9!3gy;xge{oAFdR5ZFX~-GX|Gk@W_yrkoBotk-rY8M%!9%NPrhv zv<OB_HnCpd|<=OiN>l=yR~`3Lw(|D~D6T+Q42rhCyP^Ty>j9#qvXHMT7^ zxo)1ne*R|Ydg%7?bkpvornNW6uaDnv>PR(pq?{zmPURZQ&Uu1V=$}1j#_jT6~cfq$idw;Z-IInZ=aU3c3W(=R6lTP&F%nkntza>x_ z_NE>C6n5Vib?*6=kJ@gvE%bhJ^5c_t2Y-F;({t&%ffVDMK0GtB#8f4=U)yzcSMto< zg+-?Qi}KopeC^WJOQ~|#^x-9YU6M(T&YRQrj_JN7a0>GujJ`j*zp03`oRJS_Wd%tl@s&PxY z&huqC>okKa$_Xy5x8m>uz?Nz2o?{joQw;|`JH6-_SY!ur@fPmBrS6`k?m^x92di9} z)my@f86Ma9~5#ro+Z zpO@9XeRld#rmUKnaAy_{-{F5b{)=&C+fn7zi!Osb||IXAp$^Di=dhO=v)TKJxbcpSZlfb$67ngsIA$Q)1! zn79l&lC`(bE#jL1BTS*O(Ti9l{KKIGiF^?%NFoYW;!+au8&C4AqnhE833jrG&J__o zup-_$G?pYBa}0}P5TU;_G=YQeKoDAF;wFsnV-)R6mi?IP#^?}6_$I3H9{R=!+o{%m zu&?1k2qcR+GCU^uWsN5iSqSxA3do{_&?rVf#polfH%l^z zA5q+b5Hy_#Q$1$)Y2RQS5bpvEcnZFi0VS3-(lq^hs{FSU_gl(J{%yab*8Yxa_&2I8 zMYTOB_e|S?3Y}Xs&QAQV?#MJhmuc$Dtm*+u;|$Z5Y%S%i$!P!8&P=l_)6|?Q9`Up!|Cqo}dyp*LNnUhE|KluntSqDuYrV|19*bCCRT)NN$AALbN z%NpnnNf~Yq^akt-lB~@_Hzvs-jq~h6^{p}-sPWF>y92*CL9(9-%HTQ0JE{zhruqXo zeq+{IK_8)$FegmsCbjgATT4G<9uau9)j)4d4&Z1T=LT@JjaeH*Z$S_w^Xo~naOtk` z<0+E;tQyy_CF>}sD-)xd$2v9Wk^V!9(jbYKeVuPO}4gz zZcZG&?`%yuTX7D}*;X@Mi}QqJj-TJQV4mNbYTZf7ci>An0B5WA(q%I+JxH=~nqurT zv3tgbtbs8#e(tEhUj1%O+R?1A%~=!0tojoW1A0FVP|23fjLi_4I%oRWxX|-6Hr25E zGt;7@Upal|{%LROwD&jctI$WX^6ikquKqK4NKK88CA?7ls str: + return f"{self.year:04d}-{self.month:02d}" + + def __lt__(self, other: "YearMonth") -> bool: + return (self.year, self.month) < (other.year, other.month) + + def __le__(self, other: "YearMonth") -> bool: + return (self.year, self.month) <= (other.year, other.month) + + +def parse_ym(value: str, field: str) -> YearMonth: + if not DATE_RE.match(value): + raise ValueError(f"{field} must be YYYY-MM, got {value!r}") + year = int(value[0:4]) + month = int(value[5:7]) + if month < 1 or month > 12: + raise ValueError(f"{field} month must be in 01..12, got {value!r}") + return YearMonth(year, month) + + +def shift_months(ym: YearMonth, delta: int) -> YearMonth: + zero_based = ym.year * 12 + (ym.month - 1) + delta + if zero_based < 0: + raise ValueError("date range underflow") + return YearMonth(zero_based // 12, (zero_based % 12) + 1) + + +def month_span(start: YearMonth, end: YearMonth) -> int: + return (end.year - start.year) * 12 + (end.month - start.month) + 1 + + +def last_complete_month(today: date) -> YearMonth: + current = YearMonth(today.year, today.month) + return shift_months(current, -1) + + +def default_date_range(api: str, start: Optional[str], end: Optional[str]) -> Tuple[YearMonth, YearMonth]: + window = DEFAULT_MONTHS[api] + lcm = last_complete_month(date.today()) + + end_ym = parse_ym(end, "end_date") if end else lcm + start_ym = parse_ym(start, "start_date") if start else shift_months(end_ym, -(window - 1)) + + return start_ym, end_ym + + +def validate_range(api: str, start_ym: YearMonth, end_ym: YearMonth) -> None: + if end_ym < start_ym: + raise ValueError("end_date must be >= start_date") + + lcm = last_complete_month(date.today()) + oldest_allowed = shift_months(lcm, -11) + + if end_ym > lcm: + raise ValueError(f"end_date must be <= last complete month {lcm.to_string()}") + if start_ym < oldest_allowed: + raise ValueError(f"start_date must be >= {oldest_allowed.to_string()} (12-month lookback)") + + span = month_span(start_ym, end_ym) + if span > 12: + raise ValueError("date range cannot exceed 12 months") + if api == "traffic-by-country" and span > 3: + raise ValueError("traffic-by-country supports at most 3 months") + + +def sanitize_filename(value: str) -> str: + safe = re.sub(r"[^a-zA-Z0-9_.-]+", "-", value.strip()) + return safe.strip("-") or "result" + + +def resolve_output_path(api: str, domain: str, output: Optional[str]) -> str: + if output: + return output + file_name = f"{sanitize_filename(api)}-{sanitize_filename(domain)}.json" + return os.path.join("/data", file_name) + + +def build_query(args: argparse.Namespace, start_ym: YearMonth, end_ym: YearMonth) -> Dict[str, object]: + query: Dict[str, object] = { + "start_date": start_ym.to_string(), + "end_date": end_ym.to_string(), + } + + if args.main_domain_only: + query["main_domain_only"] = True + + if args.api in COUNTRY_REQUIRED_APIS: + query["country"] = args.country + query["granularity"] = args.granularity + elif args.api == "traffic-by-country": + query["limit"] = args.limit + + return query + + +def import_api_client(): + sys.path.insert(0, RUNTIME_PATH) + try: + from data_api import ApiClient # type: ignore + except Exception as exc: # pragma: no cover + raise RuntimeError( + "data_api import failed. Ensure runtime is mounted to /opt/.manus/.sandbox-runtime" + ) from exc + return ApiClient + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Call SimilarWeb APIs using ApiClient inside Docker and persist output JSON." + ) + parser.add_argument("--api", choices=sorted(API_MAP.keys())) + parser.add_argument("--domain") + parser.add_argument("--start-date") + parser.add_argument("--end-date") + parser.add_argument("--country", default="world") + parser.add_argument("--granularity", default="monthly") + parser.add_argument("--limit", type=int, default=10) + parser.add_argument("--main-domain-only", action="store_true") + parser.add_argument("--output") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--mock-result", action="store_true") + parser.add_argument("--self-test", action="store_true") + return parser.parse_args() + + +def write_payload(path: str, payload: Dict[str, object]) -> None: + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(payload, f, ensure_ascii=False, indent=2) + f.write("\n") + + +def run() -> int: + args = parse_args() + + if args.self_test: + result = { + "ok": True, + "runtime_path": RUNTIME_PATH, + "runtime_exists": os.path.isdir(RUNTIME_PATH), + "python_version": sys.version.split()[0], + } + print(json.dumps(result, ensure_ascii=False)) + return 0 + + if not args.api or not args.domain: + raise ValueError("--api and --domain are required unless --self-test is used") + + if args.limit < 1 or args.limit > 10: + raise ValueError("--limit must be between 1 and 10") + + start_ym, end_ym = default_date_range(args.api, args.start_date, args.end_date) + validate_range(args.api, start_ym, end_ym) + + endpoint = API_MAP[args.api] + query = build_query(args, start_ym, end_ym) + output_path = resolve_output_path(args.api, args.domain, args.output) + + request_meta = { + "api": args.api, + "endpoint": endpoint, + "domain": args.domain, + "query": query, + "output": output_path, + "dry_run": bool(args.dry_run), + "mock_result": bool(args.mock_result), + } + + if args.dry_run: + print(json.dumps({"ok": True, "request": request_meta}, ensure_ascii=False)) + return 0 + + if args.mock_result: + payload = { + "request": request_meta, + "result": { + "source": "mock", + "message": "mock_result enabled", + }, + } + write_payload(output_path, payload) + print(json.dumps({"ok": True, "output": output_path, "mode": "mock"}, ensure_ascii=False)) + return 0 + + ApiClient = import_api_client() + client = ApiClient() + result = client.call_api(endpoint, path_params={"domain": args.domain}, query=query) + payload = {"request": request_meta, "result": result} + write_payload(output_path, payload) + + print(json.dumps({"ok": True, "output": output_path, "endpoint": endpoint}, ensure_ascii=False)) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(run()) + except Exception as exc: + print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr) + raise SystemExit(1) diff --git a/similarweb-analytics/scripts/install_runtime_adapter.sh b/similarweb-analytics/scripts/install_runtime_adapter.sh new file mode 100755 index 0000000..c069c19 --- /dev/null +++ b/similarweb-analytics/scripts/install_runtime_adapter.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + install_runtime_adapter.sh [target_dir] + +Default target_dir: + /opt/.manus/.sandbox-runtime + +Installs: + data_api.py +from this skill into the target runtime directory. +EOF +} + +if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + usage + exit 0 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SRC="$SCRIPT_DIR/runtime/data_api.py" +TARGET_DIR="${1:-/opt/.manus/.sandbox-runtime}" +TARGET="$TARGET_DIR/data_api.py" + +if [[ ! -f "$SRC" ]]; then + echo "Source file missing: $SRC" >&2 + exit 1 +fi + +mkdir -p "$TARGET_DIR" +cp -f "$SRC" "$TARGET" +chmod 755 "$TARGET" + +echo "Installed runtime adapter:" +echo " $TARGET" diff --git a/similarweb-analytics/scripts/run_in_docker.sh b/similarweb-analytics/scripts/run_in_docker.sh new file mode 100755 index 0000000..9f34201 --- /dev/null +++ b/similarweb-analytics/scripts/run_in_docker.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + run_in_docker.sh [runner options] -- [entrypoint args] + +Runner options: + --build Build image before running + --image Override image name (default: codex/similarweb-analytics:latest) + --runtime-dir Host path that contains data_api.py (default: /opt/.manus/.sandbox-runtime) + --output-dir Host output directory mounted to /data (default: ./similarweb-output) + --network Docker network mode (default: bridge) + -h, --help Show this message + +Entrypoint args: + --self-test + --api + --domain + --start-date YYYY-MM + --end-date YYYY-MM + --country + --granularity monthly + --limit <1..10> + --main-domain-only + --output /data/.json + --dry-run + --mock-result + +Examples: + run_in_docker.sh --build -- --self-test + run_in_docker.sh -- --api visits-total --domain amazon.com --dry-run + run_in_docker.sh -- --api global-rank --domain amazon.com --output /data/amazon-rank.json +EOF +} + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +IMAGE="${SIMILARWEB_IMAGE:-codex/similarweb-analytics:latest}" +RUNTIME_DIR="${SIMILARWEB_RUNTIME_DIR:-/opt/.manus/.sandbox-runtime}" +OUTPUT_DIR="${SIMILARWEB_OUTPUT_DIR:-$PWD/similarweb-output}" +NETWORK_MODE="${SIMILARWEB_NETWORK_MODE:-bridge}" +BUILD_IMAGE=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --build) + BUILD_IMAGE=1 + shift + ;; + --image) + IMAGE="${2:-}" + shift 2 + ;; + --runtime-dir) + RUNTIME_DIR="${2:-}" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="${2:-}" + shift 2 + ;; + --network) + NETWORK_MODE="${2:-}" + shift 2 + ;; + --) + shift + break + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown runner option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +if [[ $# -eq 0 ]]; then + echo "Missing entrypoint args. Use -- to pass container args." >&2 + usage >&2 + exit 2 +fi + +if ! command -v docker >/dev/null 2>&1; then + echo "docker command not found" >&2 + exit 127 +fi + +if [[ ! -d "$RUNTIME_DIR" ]]; then + echo "Runtime dir not found: $RUNTIME_DIR" >&2 + echo "It must contain data_api.py for real API calls." >&2 + exit 1 +fi +if [[ ! -f "$RUNTIME_DIR/data_api.py" ]]; then + echo "Runtime module missing: $RUNTIME_DIR/data_api.py" >&2 + exit 1 +fi + +mkdir -p "$OUTPUT_DIR" +# Keep container non-root while ensuring mounted output path is writable. +chmod 0777 "$OUTPUT_DIR" 2>/dev/null || true + +if [[ "$BUILD_IMAGE" -eq 1 ]] || ! docker image inspect "$IMAGE" >/dev/null 2>&1; then + docker build -t "$IMAGE" -f "$SCRIPT_DIR/docker/Dockerfile" "$SCRIPT_DIR/docker" +fi + +docker run --rm \ + --network "$NETWORK_MODE" \ + --read-only \ + --tmpfs /tmp:rw,noexec,nosuid,size=64m \ + --tmpfs /var/tmp:rw,noexec,nosuid,size=32m \ + --cap-drop ALL \ + --security-opt no-new-privileges \ + --pids-limit 256 \ + --memory 512m \ + --cpus 1.0 \ + -e SIMILARWEB_API_KEY \ + -e SIMILARWEB_BASE_URL \ + -e RAPIDAPI_KEY \ + -e RAPIDAPI_SIMILARWEB_HOST \ + -v "$RUNTIME_DIR:/opt/.manus/.sandbox-runtime:ro" \ + -v "$OUTPUT_DIR:/data:rw" \ + "$IMAGE" "$@" diff --git a/similarweb-analytics/scripts/runtime/__pycache__/data_api.cpython-312.pyc b/similarweb-analytics/scripts/runtime/__pycache__/data_api.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..518722f3f70e79ba8a55e325f568cca1a150ae94 GIT binary patch literal 8392 zcmbtZTWl0pnm%>u`_0|$3pUs;U%(XD-7(}sxFsewi4|~y4a>#}TPk;zahtxSsv78~ zy<#IqYBaM7BrBOHjD~rNqc9qecBRQmBQ2ZENRjrXO-FK5VKqpTm%KHYRaVT){^wLz zcf09iBJDApKG*;J=bZoYoxlFe<8dNL)8DN`Gg}b)3$B>SVHBv1Z4^Sw=oFIBERraR zj!|)XmZor-i7~Sb`DJHW@@tv3l3yG7Wn=cZW7a{&T4K()Yt|Ka&${EDSx?+M>!lEl zUPY4i3X*KH?=!|&akfQvkajz?J3gb0*3T+@&H8C1`z7aRz!grU=Gs(qZ>o7V)x4W( zzD>0jvnKg(+GYb%Kn_a5gJcchvK6>#Gr5^54(sYL6Y26OTIH4#> zWr}BXyFd<45Ok*?z-gpoGL+qd@NQa+;bRqqPtgV@`{ZO;i>JcmM%9s2dM*}K7e*90 zl^hv;;e|cVzwpBIdtMCJHa{W)QyDE9QNwCPiKa9)ToIBm-V?sNP%5Kfr6gG!6jTdj zS5S$yD?YP*)4nQdLv4Tt*X;`wmXaBX#sUmN%IfZkgp^8xK+UA&h|Z{*qO({$rg&Cy zVaoVi6c1#&PY~+PTo4p5?jp;#;8MWMI-#K2Ap045;PhOXG{)fW>~Q$}Xc#VBjcRiE zl5yut;j^*ioERHX#KgJ!a&}+S(HdNPY(!H;EP-%%(ukZmE1o4<6FwgW(rWhQrZXj3 zN~dDch^WcZh?tf@AtQ3QX>0aSQwl%2_FOUz^k5sB`~w>`nO2i&B_gY^w0cfUrn2GY zi&eI0j*chiqA_^{jxT$l>4dX#@~k4J7Bb-<`r| zRWOj)9VOFRq+$vYHZ9nEz<+Av8E9QbIqD*%woynk$DqZE2ZnLgBvGcX%psxLpQB-J z=ii)5n{#0_#u3aSwjd2jV$D&o0&8~Wn4iR*rEV_XBS|^-Cvj&r=T^A0!TJ>mK`3W| zT9w-j;_Xv<;Auyw2kY97jLl34r>2i(yB0JprS1=h;d(-~%L_%4@h4PRch`GAeM_}g zN9wgVW=>|e)~1f`4W&Zn9b-DYU8j?(ZUs%36X%sK*tl+0KqsOpWh+#aZ8+=38Idfj z&Z=^3o(ya@K*c-9FoWY1Z27To1e~J@LN+jFN@>V!+XW-kpFwsRl>=ztxx4+R|3LqT zHBY#)icRJ60ZSQ^H?tO%4&+vVp|NYF>OrbMW^o1WW(DJ~y`_Imnt2?d*f4;j6 zisbp)&Xt|0o4>nl;`Wi>PJTMMnkWuWU@D!Z?p=4gwcB&QjeZ&}?3*rzW}sR)t>MnD zQb$jo{bx7vMc~_Abr?vu5X=)XMHc_!EfNKInJ7R_@QFBsfJTui5=(SlFBZkky^MyH ziKHC8SQR|84H5WkLP(QlGvPN4!xEcgRhL$Q;xL~vXT3+|&>{ivHR`yev_d=f;SDKA z<=7l^n>9i941#gkt2ZPv-`~&()u-pzaaC2f_bA>vrCSYWucK^F-O-0CurI1|6$mNu zv@!y;mEDkKr>2rzM3E%`#DFzbE|N^lN6)6ACB)4DFs@UB3PR*|rWAQzR$vIv0miOs z3_#Hk@6cHZ9D))A;yMEgp<4_+An1x{hF2hn*~GU*IrI*r(bOuzgsHqm*@;Qnv2n#H zP{HC(cqh9cIIUh&Of+7vDQ4Rtn5y1_>@s@nMIBpj9Jsr6c%{GCK9YCcZ)v@D>Vvm$ z%oSVu@|JJ?t=HaO7Oo4o{(eO%w(TkU_pbY2T=TzJ^dHDu@A=y92f7Md4-^9j3+{st zT)uTz|6NyqDcG?d+_4tiaqHse);n?`xT6?6R&XC91Ge3DZ6gE1Yr*iU{Z9K==|V7E z44x{uPkrlcFS*~@U}&GSjA)0GC{c5Dxn`&fY{E#3RpB>Vu-KRaOGMeLRh2@ba@5D@ zmsMW?0?jT~?QD(0VpaHSZ8`dOb>0$K zkCDV`*zA$Ch+%C*TEqywA?2zb1op=++3F~?Hw=a9B8JcnDQA-`x2>k3Sh53$*myUj zoL#bkTH0>gKdbo18N@rLvO7;D)O0G9Q~>&Nrm&Jns43``*BMU{!PRYe5Fm;t)Jr<^ z>cmN~nGD7f*0`)KBqiN$&ij^)x)Yg+fjorOEIg{^NE11AdZ6Ayer?8Vl7y=%VSTjvYD-l8v*XG`vub$9og zyZeS*bPwjKQpeWyj?h|1C{N}2tL~D&ZQakW`T0W2u9at3cC1qWw&VB1zZ+iP^V-^; z*S_)>_q<*hJy!4@&oke8Ti;JyO{{wd*Sv!(!PQd*?_kk;AkUP1!OE)4%C65ZelgpJAD=*@43=v7?0F-sx zf-Hgpsk+^qs9;}^J$_1s$OX4)wOWK^z7keBz(F8{8{I$3U5IK6wZM#AdD(MnWI>LL zA$ZXr#;dU~T-H4m?U5z?cF=vbD$z?E{t!fXGeD#Xd?v&pc)KzN)0HEb1|TLNVqivs z9>;Ol1kR2^_5&*EHS}2m<;5W<;{;yElO`e4SqLDdn(60W=*{*u>Zf^DRcAa0!_-e8 z14ZrZ{nfUQwyk&WUhCYwaqqfe6K6?weQM>6zLHnu+J3@jwuR&oMFy<20tNIa@p(Yj_M`yswz5^P& zL@zNp<}&pQM;+c|w2Biz$aEZvGsh^jL?s|-3sO!ZLBAN#s?V32i>&tUVZmjC4lq+X zt0`$&w}9xWGDMFo5XOnCx)Xvf2~`sz`k}LP$z+V+E08yg;dBNbmpXM$r_QUGo0{2u z8EV<~`fIKEh4av-&Omkgy~Hv^1us*6;@`;BCr0`R5S85=%4aV%}ym$Ra_Y0T6nH3xF+FrA@11`DSRn1~n2C zNRTZ@=NMSG3R!Bq$*~ZgV8I4jZ_*K3f?&Q>l@kihpaC?BRO$k}XWl{-{AJKZ<}GxA z8mK>?W&j9#-G(%Xl*<5G?El~W!`=ME-7h&3LEy9suYdy^1Qu*LYm{1YyoD0%h01=k zD%`50jUNLI3Ao3`t|5{0WpfV6e$yg3ZadApGC}Tp5K&&T|D*i^RVP)=P7Oc48xru` zzGScQC%LdUmE1Wy_Nh;=VD5}wj>S59Jz zV7jD?asn3_`C>%35+Ew67{X$!Wh4VKVlS6%tFv7-Jo+AVsmH)*L%@UP9Eb1wJC=K{ z_pJMe*8D>&%<9SCzw^6y3jU#@|413RJb^nBAi?F#^~{Ht);svM4t{0mbH32Q7dytU zvG;a||3zEQUeDed{JLXkb^03iD}CEbgTtkv-Je|g^`%m8KlqlOcJM8I+rhVN34Ct@ zqTgf6k8t+a$6HWK`}?`8IRm;rLVkz06l`DKcYWW7``3emYr(-=r&r%91P6=3m-6Ff zn(++X@9h3n*GFBq#*3Z&b?g7$4{m#eD9=!-tM^vxjqVSuWd>?bz72H2yL8!k-T7c> z*C!W$eX+z1V$OQ|OMOERUDmdCu(v=Jz5TJw9W^!P3Bl5iKJn5;7wtthzb_pHU#NCTnRWp|exTgbbMJSv&=Y z8%Si}$<`{>%6BOvHVxma&YUsA1{pl|0%T9Twb(W8-$abOa|l`NW0kBqiIggyH|u|D z4MNx$<0POf+itwNLKXZMnl@fohJugJ7X`HKa&i0p0&?J3XiML;S4{kl)aGY zZmhV`i1Bn(UdHWMff~Oc;crz6&M*xXd&MiLY)2!bCHS{v7f5`m1MrjI(hEw^y{l}) zB|GYY$RI8`QO97}g-dR2Z6X<*@OUi*Vnr z+XTS45#wXwd>RA-J`XAwX()?0+YXuTuADrvGQ=F}-p!YxyQ&{GWmUo8Nw;bl{54a- zNKwHLE(Ieu1wX7HR&0D*Cf3e)cw;wXJjUB_33G>e!=Hp3zu!HvA2dE|9#*cvFbq1? ze}xS6kD?y2Hj4e90HOx|f?VGq$2TbW4Ql&u)c0o;E~4;bD@8f-xyJ~8;3R3+_cTR; zwnGM|M7zqk`p}66!iDgeV*fwfgTgb5#es9ApcDtxd;Ot8I8p3R-V5z79EcV}i}yxe zEbKp59EsfvA1EBm6vJ7tZehBB0)^Jy8^})4-?wKe>hME^^KzC!?v5MIf^*=XY~(8b E4|;#&!T None: + self.similarweb_api_key = similarweb_api_key or os.getenv("SIMILARWEB_API_KEY") + self.similarweb_base_url = ( + similarweb_base_url + or os.getenv("SIMILARWEB_BASE_URL") + or "https://api.similarweb.com" + ).rstrip("/") + self.rapidapi_key = rapidapi_key or os.getenv("RAPIDAPI_KEY") + self.rapidapi_host = rapidapi_host or os.getenv("RAPIDAPI_SIMILARWEB_HOST") or "similarweb13.p.rapidapi.com" + self.timeout = timeout + + def call_api( + self, + api_name: str, + *, + path_params: Optional[Mapping[str, Any]] = None, + query: Optional[Mapping[str, Any]] = None, + ) -> Dict[str, Any]: + path_params = dict(path_params or {}) + query = dict(query or {}) + + domain = str(path_params.get("domain", "")).strip() + if not domain: + raise ApiError("path_params.domain is required") + + if self.similarweb_api_key: + return self._call_official(api_name, domain=domain, query=query) + + if self.rapidapi_key: + return self._call_rapidapi_snapshot(api_name, domain=domain, query=query) + + raise ApiError( + "No credentials configured. Set SIMILARWEB_API_KEY (preferred) or RAPIDAPI_KEY." + ) + + def _call_official(self, api_name: str, *, domain: str, query: Dict[str, Any]) -> Dict[str, Any]: + spec = OFFICIAL_ENDPOINTS.get(api_name) + if not spec: + raise ApiError(f"Unsupported api_name for official mode: {api_name}") + + path = spec.path.format(domain=domain) + q = self._clean_query(query) + q["api_key"] = self.similarweb_api_key + url = f"{self.similarweb_base_url}{path}?{urllib.parse.urlencode(q)}" + + req = urllib.request.Request(url=url, method="GET") + return self._do_request(req, mode="official", api_name=api_name, url=url) + + def _call_rapidapi_snapshot(self, api_name: str, *, domain: str, query: Dict[str, Any]) -> Dict[str, Any]: + encoded_domain = urllib.parse.quote(domain) + url = f"https://{self.rapidapi_host}/v2/getdomain?domain={encoded_domain}" + headers = { + "x-rapidapi-key": self.rapidapi_key or "", + "x-rapidapi-host": self.rapidapi_host, + } + req = urllib.request.Request(url=url, method="GET", headers=headers) + + resp = self._do_request(req, mode="rapidapi", api_name=api_name, url=url) + return { + "_adapter": { + "mode": "rapidapi", + "note": "Using /v2/getdomain snapshot fallback; not 1:1 with official endpoint schema.", + "requested_api": api_name, + "requested_query": query, + }, + "data": resp, + } + + @staticmethod + def _clean_query(query: Mapping[str, Any]) -> Dict[str, Any]: + out: Dict[str, Any] = {} + for k, v in query.items(): + if v is None: + continue + if isinstance(v, bool): + out[k] = "true" if v else "false" + else: + out[k] = str(v) + return out + + def _do_request(self, req: urllib.request.Request, *, mode: str, api_name: str, url: str) -> Dict[str, Any]: + try: + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + body = resp.read().decode("utf-8", errors="replace") + data = json.loads(body) if body else {} + return { + "_meta": { + "mode": mode, + "api_name": api_name, + "http_status": resp.status, + "url": url, + }, + "response": data, + } + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + try: + parsed = json.loads(body) + except Exception: + parsed = {"raw": body} + raise ApiError( + json.dumps( + { + "http_status": exc.code, + "mode": mode, + "api_name": api_name, + "url": url, + "error": parsed, + }, + ensure_ascii=False, + ) + ) + except urllib.error.URLError as exc: + raise ApiError(f"Network error for {url}: {exc}") + + +__all__ = ["ApiClient", "ApiError"] diff --git a/similarweb-analytics/scripts/test_docker_workflow.sh b/similarweb-analytics/scripts/test_docker_workflow.sh new file mode 100755 index 0000000..27e961c --- /dev/null +++ b/similarweb-analytics/scripts/test_docker_workflow.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RUNTIME_FIXTURE_DIR="$SCRIPT_DIR/tests/fixtures" +OUTPUT_DIR="${1:-$SCRIPT_DIR/../tmp/test-output}" +RUNNER="$SCRIPT_DIR/run_in_docker.sh" + +mkdir -p "$OUTPUT_DIR" + +echo "[1/4] Build image + self-test" +"$RUNNER" --build --runtime-dir "$RUNTIME_FIXTURE_DIR" --output-dir "$OUTPUT_DIR" -- --self-test + +echo "[2/4] Dry-run validation" +"$RUNNER" --runtime-dir "$RUNTIME_FIXTURE_DIR" --output-dir "$OUTPUT_DIR" -- \ + --api visits-total \ + --domain amazon.com \ + --country world \ + --dry-run + +echo "[3/4] Mock call writes output file" +"$RUNNER" --runtime-dir "$RUNTIME_FIXTURE_DIR" --output-dir "$OUTPUT_DIR" -- \ + --api global-rank \ + --domain amazon.com \ + --mock-result \ + --output /data/mock-global-rank.json + +test -f "$OUTPUT_DIR/mock-global-rank.json" + +echo "[4/4] Fixture ApiClient end-to-end call writes output" +"$RUNNER" --runtime-dir "$RUNTIME_FIXTURE_DIR" --output-dir "$OUTPUT_DIR" -- \ + --api traffic-by-country \ + --domain amazon.com \ + --start-date 2025-12 \ + --end-date 2026-02 \ + --limit 3 \ + --output /data/fixture-traffic-by-country.json + +test -f "$OUTPUT_DIR/fixture-traffic-by-country.json" +echo "All tests passed. Output dir: $OUTPUT_DIR" diff --git a/similarweb-analytics/scripts/tests/fixtures/data_api.py b/similarweb-analytics/scripts/tests/fixtures/data_api.py new file mode 100644 index 0000000..f53c772 --- /dev/null +++ b/similarweb-analytics/scripts/tests/fixtures/data_api.py @@ -0,0 +1,8 @@ +class ApiClient: + def call_api(self, api_name, path_params=None, query=None): + return { + "fixture": True, + "api_name": api_name, + "path_params": path_params or {}, + "query": query or {}, + }