feat: rebuild CSP practice workflow, UX and automation

2026-02-13 15:49:05 +08:00
--- a/scripts/bootstrap_ubuntu.sh
+++ b/scripts/bootstrap_ubuntu.sh
@@ -10,10 +10,11 @@ sudo apt-get install -y \
  libjsoncpp-dev libyaml-cpp-dev libhiredis-dev \
  libpq-dev libmariadb-dev libmariadb-dev-compat \
  libsqlite3-dev sqlite3 \
+  zlib1g-dev libssl-dev libbrotli-dev uuid-dev \
  catch2

 # Node.js / npm 通常由 NodeSource 预装；这里仅做提示
 node -v
 npm -v

-echo "Bootstrap OK"
+echo "Bootstrap OK"
--- a/scripts/generate_cspj_problem_rag.py
+++ b/scripts/generate_cspj_problem_rag.py
@@ -0,0 +1,475 @@
+#!/usr/bin/env python3
+"""Generate new CSP-J problems with RAG + dedupe checks."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+import random
+import re
+import sqlite3
+import time
+from dataclasses import dataclass
+from difflib import SequenceMatcher
+from typing import Any
+from urllib.parse import quote
+
+import requests
+
+DEFAULT_BASE_URL = "https://www.luogu.com.cn"
+DEFAULT_TAG_IDS = [343, 82]  # CSP-J + NOIP junior
+RETRYABLE_HTTP_CODES = {429, 500, 502, 503, 504}
+CONTEXT_RE = re.compile(
+    r'<script[^>]*id="lentille-context"[^>]*>(.*?)</script>', re.DOTALL
+)
+
+
+@dataclass
+class ExistingProblem:
+    id: int
+    title: str
+    statement_md: str
+
+
+def now_sec() -> int:
+    return int(time.time())
+
+
+def normalize(text: str) -> str:
+    text = text.lower().strip()
+    text = re.sub(r"\s+", " ", text)
+    text = re.sub(r"[^0-9a-z\u4e00-\u9fff ]+", " ", text)
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def similarity(a: str, b: str) -> float:
+    if not a or not b:
+        return 0.0
+    return SequenceMatcher(None, normalize(a), normalize(b)).ratio()
+
+
+def requests_with_retry(url: str, timeout: int, retries: int, sleep_sec: float) -> str:
+    last_error: Exception | None = None
+    for i in range(1, retries + 1):
+        try:
+            resp = requests.get(url, timeout=timeout)
+        except requests.RequestException as exc:
+            last_error = exc
+            if i < retries:
+                time.sleep(i * sleep_sec)
+                continue
+            raise RuntimeError(f"request failed: {exc}") from exc
+        if resp.status_code in RETRYABLE_HTTP_CODES:
+            if i < retries:
+                time.sleep(i * sleep_sec)
+                continue
+            raise RuntimeError(f"request failed: HTTP {resp.status_code}")
+        if resp.status_code >= 400:
+            raise RuntimeError(f"request failed: HTTP {resp.status_code}")
+        return resp.text
+    if last_error:
+        raise RuntimeError(str(last_error))
+    raise RuntimeError("request failed")
+
+
+def extract_context_json(html_text: str) -> dict[str, Any]:
+    match = CONTEXT_RE.search(html_text)
+    if not match:
+        raise RuntimeError("lentille-context script not found")
+    return json.loads(match.group(1))
+
+
+def crawl_luogu_titles(base_url: str, timeout: int, retries: int, sleep_sec: float) -> list[str]:
+    tags_csv = ",".join(str(x) for x in DEFAULT_TAG_IDS)
+    url = f"{base_url}/problem/list?type=all&tag={quote(tags_csv)}&page=1"
+    text = requests_with_retry(url, timeout=timeout, retries=retries, sleep_sec=sleep_sec)
+    ctx = extract_context_json(text)
+    result = (((ctx.get("data") or {}).get("problems") or {}).get("result") or [])
+    titles: list[str] = []
+    for row in result:
+        if not isinstance(row, dict):
+            continue
+        title = str(row.get("title") or "").strip()
+        if title:
+            titles.append(title)
+    return titles
+
+
+def load_existing(conn: sqlite3.Connection) -> list[ExistingProblem]:
+    cur = conn.execute("SELECT id,title,statement_md FROM problems")
+    rows: list[ExistingProblem] = []
+    for row in cur.fetchall():
+        rows.append(
+            ExistingProblem(
+                id=int(row[0]),
+                title=str(row[1] or ""),
+                statement_md=str(row[2] or ""),
+            )
+        )
+    return rows
+
+
+def collect_keywords(existing: list[ExistingProblem], luogu_titles: list[str]) -> list[str]:
+    bucket: dict[str, int] = {}
+
+    def add_word(w: str, weight: int = 1) -> None:
+        w = normalize(w)
+        if not w or len(w) < 2:
+            return
+        if w.isdigit():
+            return
+        bucket[w] = bucket.get(w, 0) + weight
+
+    for p in existing:
+        parts = re.split(r"[\s,/|+()\[\]【】-]+", p.title)
+        for part in parts:
+            add_word(part, 1)
+
+    for t in luogu_titles:
+        parts = re.split(r"[\s,/|+()\[\]【】-]+", t)
+        for part in parts:
+            add_word(part, 2)
+
+    ranked = sorted(bucket.items(), key=lambda x: x[1], reverse=True)
+    return [k for k, _ in ranked[:40]]
+
+
+def llm_generate_problem(prompt: str, timeout: int, retries: int, sleep_sec: float) -> dict[str, Any]:
+    url = os.getenv("OI_LLM_API_URL", "").strip()
+    api_key = os.getenv("OI_LLM_API_KEY", "").strip()
+    model = os.getenv("OI_LLM_MODEL", "qwen3-max").strip()
+    if not url:
+        raise RuntimeError("missing OI_LLM_API_URL")
+
+    headers = {"Content-Type": "application/json"}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    body = {
+        "model": model,
+        "stream": False,
+        "temperature": 0.7,
+        "messages": [
+            {
+                "role": "system",
+                "content": "你是 CSP-J 出题人。只输出 JSON，不输出额外解释。",
+            },
+            {"role": "user", "content": prompt},
+        ],
+    }
+
+    for i in range(1, retries + 1):
+        try:
+            resp = requests.post(url, headers=headers, json=body, timeout=timeout)
+        except requests.RequestException as exc:
+            if i < retries:
+                time.sleep(i * sleep_sec)
+                continue
+            raise RuntimeError(f"llm failed: {exc}") from exc
+
+        if resp.status_code in RETRYABLE_HTTP_CODES:
+            if i < retries:
+                time.sleep(i * sleep_sec)
+                continue
+            raise RuntimeError(f"llm failed: HTTP {resp.status_code}")
+        if resp.status_code >= 400:
+            raise RuntimeError(f"llm failed: HTTP {resp.status_code}: {resp.text[:200]}")
+
+        payload = resp.json()
+        content = (((payload.get("choices") or [{}])[0].get("message") or {}).get("content") or "")
+        text = str(content).strip()
+        if text.startswith("```"):
+            text = re.sub(r"^```[a-zA-Z0-9_-]*", "", text).strip()
+            text = text.removesuffix("```").strip()
+        try:
+            obj = json.loads(text)
+            if isinstance(obj, dict):
+                return obj
+        except json.JSONDecodeError:
+            match = re.search(r"\{[\s\S]*\}", text)
+            if match:
+                obj = json.loads(match.group(0))
+                if isinstance(obj, dict):
+                    return obj
+        raise RuntimeError("llm returned non-json content")
+    raise RuntimeError("llm failed")
+
+
+def fallback_generate_problem(sampled_keywords: list[str], llm_error: str) -> dict[str, Any]:
+    seed = now_sec()
+    n = 5 + (seed % 6)
+    m = 7 + (seed % 9)
+    title = f"CSP-J 训练题·余数统计 {seed}"
+    statement_md = f"""
+# 题目描述
+给定一个长度为 {n} 的整数序列，你需要统计有多少个连续子段的元素和对 {m} 取模后等于 0。
+
+## 输入格式
+第一行一个整数 n。  
+第二行 n 个整数 a_i。
+
+## 输出格式
+输出一个整数，表示满足条件的连续子段数量。
+
+## 数据范围
+- 1 <= n <= 2e5
+- |a_i| <= 1e9
+
+## 提示
+可以使用前缀和与计数哈希优化到 O(n)。
+""".strip()
+    sample_input = "6\n1 2 3 4 5 6\n"
+    sample_output = "3\n"
+    return {
+        "title": title,
+        "difficulty": 3,
+        "statement_md": statement_md,
+        "sample_input": sample_input,
+        "sample_output": sample_output,
+        "answer": "统计前缀和模 m 的相同值配对数量",
+        "explanation": "维护 prefix % m 的出现次数，当前值为 x 时，答案增加 cnt[x]，再令 cnt[x]++。",
+        "knowledge_points": ["前缀和", "哈希计数", "同余"],
+        "tags": ["csp-j", "prefix-sum", "hash"],
+        "llm_error": llm_error[:200],
+        "rag_keywords": sampled_keywords,
+    }
+
+
+def build_problem_md(obj: dict[str, Any]) -> tuple[str, str, str]:
+    statement = str(obj.get("statement_md") or "").strip()
+    if not statement:
+        desc = str(obj.get("description") or "").strip()
+        in_fmt = str(obj.get("input_format") or "").strip()
+        out_fmt = str(obj.get("output_format") or "").strip()
+        statement = "\n\n".join(
+            [
+                "# 题目描述",
+                desc,
+                "## 输入格式",
+                in_fmt,
+                "## 输出格式",
+                out_fmt,
+            ]
+        ).strip()
+    sample_input = str(obj.get("sample_input") or "").strip()
+    sample_output = str(obj.get("sample_output") or "").strip()
+    return statement, sample_input, sample_output
+
+
+def maybe_duplicate(existing: list[ExistingProblem], title: str, statement_md: str, threshold: float) -> tuple[bool, int | None, float]:
+    best_id = None
+    best_score = 0.0
+    for p in existing:
+        t_sim = similarity(title, p.title)
+        s_sim = similarity(statement_md[:1200], p.statement_md[:1200])
+        score = max(t_sim, s_sim * 0.9 + t_sim * 0.1)
+        if score > best_score:
+            best_score = score
+            best_id = p.id
+    return best_score >= threshold, best_id, best_score
+
+
+def insert_problem(conn: sqlite3.Connection, title: str, statement_md: str, sample_input: str, sample_output: str, difficulty: int, profile_json: str, tags: list[str]) -> int:
+    ts = now_sec()
+    slug_base = normalize(title).replace(" ", "-")
+    slug_base = re.sub(r"[^a-z0-9\\-]+", "", slug_base)
+    if not slug_base:
+        slug_base = "cspj-generated"
+    slug = f"{slug_base[:50]}-{ts}"
+
+    cur = conn.cursor()
+    cur.execute(
+        """
+        INSERT INTO problems(
+          slug,title,statement_md,difficulty,source,statement_url,llm_profile_json,sample_input,sample_output,created_at
+        ) VALUES(?,?,?,?,?,?,?,?,?,?)
+        """,
+        (
+            slug,
+            title,
+            statement_md,
+            max(1, min(10, difficulty)),
+            "llm:cspj-generated",
+            "",
+            profile_json,
+            sample_input,
+            sample_output,
+            ts,
+        ),
+    )
+    problem_id = int(cur.lastrowid)
+    for tag in tags:
+        cur.execute(
+            "INSERT OR IGNORE INTO problem_tags(problem_id,tag) VALUES(?,?)",
+            (problem_id, normalize(tag)),
+        )
+    conn.commit()
+    return problem_id
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="RAG generate CSP-J problems")
+    parser.add_argument("--db-path", required=True)
+    parser.add_argument("--count", type=int, default=1, help="generate count each run")
+    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
+    parser.add_argument("--timeout", type=int, default=60)
+    parser.add_argument("--retries", type=int, default=4)
+    parser.add_argument("--retry-sleep-sec", type=float, default=1.5)
+    parser.add_argument("--dedupe-threshold", type=float, default=0.72)
+    args = parser.parse_args()
+
+    conn = sqlite3.connect(args.db_path)
+    conn.execute("PRAGMA foreign_keys=ON")
+    conn.execute("PRAGMA busy_timeout=5000")
+
+    existing = load_existing(conn)
+    luogu_titles: list[str] = []
+    try:
+        luogu_titles = crawl_luogu_titles(
+            args.base_url, timeout=args.timeout, retries=args.retries, sleep_sec=args.retry_sleep_sec
+        )
+    except Exception:
+        luogu_titles = []
+
+    keywords = collect_keywords(existing, luogu_titles)
+    if not keywords:
+        keywords = ["模拟", "枚举", "前缀和", "字符串", "贪心", "搜索"]
+
+    inserted = 0
+    skipped_duplicate = 0
+    failed = 0
+    details: list[dict[str, Any]] = []
+
+    for _ in range(max(1, args.count)):
+        sampled_keywords = random.sample(keywords, k=min(8, len(keywords)))
+        prompt = f"""
+请生成一道原创 CSP-J 风格编程题，难度 2~4，禁止与常见模板题同构。
+结合关键词：{', '.join(sampled_keywords)}
+
+输出 JSON：
+{{
+  "title": "题目标题",
+  "difficulty": 2,
+  "statement_md": "Markdown 题面（含描述、输入格式、输出格式、数据范围）",
+  "sample_input": "样例输入",
+  "sample_output": "样例输出",
+  "answer": "简要答案关键点",
+  "explanation": "讲解",
+  "knowledge_points": ["知识点1","知识点2"],
+  "tags": ["csp-j","入门","..."]
+}}
+""".strip()
+
+        source = "llm"
+        llm_error = ""
+        try:
+            obj = llm_generate_problem(
+                prompt, timeout=args.timeout, retries=args.retries, sleep_sec=args.retry_sleep_sec
+            )
+        except Exception as exc:
+            source = "fallback"
+            llm_error = str(exc)
+            obj = fallback_generate_problem(sampled_keywords, llm_error)
+
+        try:
+            title = str(obj.get("title") or "").strip()
+            if not title:
+                raise RuntimeError("generated title is empty")
+            difficulty = int(obj.get("difficulty") or 2)
+            statement_md, sample_input, sample_output = build_problem_md(obj)
+
+            pre_dup, dup_id, dup_score = maybe_duplicate(
+                existing, title, statement_md, args.dedupe_threshold
+            )
+            if pre_dup:
+                skipped_duplicate += 1
+                details.append(
+                    {
+                        "title": title,
+                        "status": "skip_pre_duplicate",
+                        "source": source,
+                        "similar_problem_id": dup_id,
+                        "similarity": round(dup_score, 4),
+                    }
+                )
+                continue
+
+            profile = {
+                "schema_version": 1,
+                "platform": "llm-generated" if source == "llm" else "fallback-generated",
+                "difficulty": difficulty,
+                "answer": str(obj.get("answer") or ""),
+                "explanation": str(obj.get("explanation") or ""),
+                "knowledge_points": obj.get("knowledge_points") if isinstance(obj.get("knowledge_points"), list) else [],
+                "tags": obj.get("tags") if isinstance(obj.get("tags"), list) else [],
+                "generated_at": now_sec(),
+                "rag_keywords": sampled_keywords,
+            }
+            if llm_error:
+                profile["llm_error"] = llm_error[:300]
+
+            # Post-check against fresh existing corpus before insert.
+            existing_latest = load_existing(conn)
+            post_dup, post_dup_id, post_dup_score = maybe_duplicate(
+                existing_latest, title, statement_md, args.dedupe_threshold
+            )
+            if post_dup:
+                skipped_duplicate += 1
+                details.append(
+                    {
+                        "title": title,
+                        "status": "skip_post_duplicate",
+                        "source": source,
+                        "similar_problem_id": post_dup_id,
+                        "similarity": round(post_dup_score, 4),
+                    }
+                )
+                continue
+
+            tags = profile["tags"] if isinstance(profile["tags"], list) else []
+            if "csp-j" not in [normalize(str(x)) for x in tags]:
+                tags = [*tags, "csp-j"]
+            tags = [str(x) for x in tags][:12]
+
+            problem_id = insert_problem(
+                conn,
+                title=title,
+                statement_md=statement_md,
+                sample_input=sample_input,
+                sample_output=sample_output,
+                difficulty=difficulty,
+                profile_json=json.dumps(profile, ensure_ascii=False),
+                tags=tags,
+            )
+            inserted += 1
+            details.append(
+                {"title": title, "status": "inserted", "source": source, "problem_id": problem_id}
+            )
+            existing.append(ExistingProblem(problem_id, title, statement_md))
+        except Exception as exc:
+            failed += 1
+            details.append({"status": "failed", "source": source, "error": str(exc)})
+
+    conn.close()
+    print(
+        json.dumps(
+            {
+                "db_path": args.db_path,
+                "requested_count": max(1, args.count),
+                "inserted": inserted,
+                "skipped_duplicate": skipped_duplicate,
+                "failed": failed,
+                "details": details,
+                "keyword_sample_size": len(keywords),
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+    return 0 if failed == 0 else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/generate_problem_solutions.py
+++ b/scripts/generate_problem_solutions.py
@@ -0,0 +1,324 @@
+#!/usr/bin/env python3
+"""Asynchronously generate multiple solutions for a problem and store into SQLite."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sqlite3
+import time
+from dataclasses import dataclass
+from typing import Any
+
+import requests
+
+RETRYABLE_HTTP_CODES = {500, 502, 503, 504}
+
+
+@dataclass
+class Problem:
+    id: int
+    title: str
+    statement_md: str
+    difficulty: int
+    source: str
+    sample_input: str
+    sample_output: str
+
+
+def now_sec() -> int:
+    return int(time.time())
+
+
+def extract_json_object(text: str) -> dict[str, Any] | None:
+    raw = text.strip()
+    if raw.startswith("```"):
+        raw = re.sub(r"^```[a-zA-Z0-9_-]*", "", raw).strip()
+        raw = raw.removesuffix("```").strip()
+    try:
+        obj = json.loads(raw)
+        if isinstance(obj, dict):
+            return obj
+    except json.JSONDecodeError:
+        pass
+
+    match = re.search(r"\{[\s\S]*\}", text)
+    if not match:
+        return None
+    try:
+        obj = json.loads(match.group(0))
+        return obj if isinstance(obj, dict) else None
+    except json.JSONDecodeError:
+        return None
+
+
+def llm_request(prompt: str, timeout: int, retries: int, sleep_sec: float) -> str:
+    url = os.getenv("OI_LLM_API_URL", "").strip()
+    api_key = os.getenv("OI_LLM_API_KEY", "").strip()
+    model = os.getenv("OI_LLM_MODEL", "qwen3-max").strip()
+    if not url:
+        raise RuntimeError("missing OI_LLM_API_URL")
+
+    headers = {"Content-Type": "application/json"}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    body = {
+        "model": model,
+        "stream": False,
+        "temperature": 0.3,
+        "messages": [
+            {
+                "role": "system",
+                "content": "你是资深 OI/CSP 教练。严格输出 JSON，不要输出任何额外文本。",
+            },
+            {"role": "user", "content": prompt},
+        ],
+    }
+
+    last_error: Exception | None = None
+    for i in range(1, retries + 1):
+        try:
+            resp = requests.post(url, headers=headers, json=body, timeout=timeout)
+        except requests.RequestException as exc:
+            last_error = exc
+            if i < retries:
+                time.sleep(sleep_sec * i)
+                continue
+            raise RuntimeError(f"llm request failed: {exc}") from exc
+
+        if resp.status_code in RETRYABLE_HTTP_CODES:
+            if i < retries:
+                time.sleep(sleep_sec * i)
+                continue
+            raise RuntimeError(f"llm retry exhausted: HTTP {resp.status_code}")
+
+        if resp.status_code >= 400:
+            raise RuntimeError(f"llm request failed: HTTP {resp.status_code}: {resp.text[:300]}")
+
+        payload = resp.json()
+        choices = payload.get("choices") or []
+        if not choices:
+            raise RuntimeError("llm response missing choices")
+        content = ((choices[0] or {}).get("message") or {}).get("content")
+        if not content:
+            raise RuntimeError("llm response missing content")
+        return str(content)
+
+    if last_error:
+        raise RuntimeError(f"llm request failed: {last_error}") from last_error
+    raise RuntimeError("llm request failed")
+
+
+def fallback_solutions(max_solutions: int) -> list[dict[str, Any]]:
+    base = [
+        {
+            "title": "解法一：直接模拟/枚举",
+            "idea_md": "按题意拆分步骤，先写可过样例的直观解法，再补边界处理。",
+            "explanation_md": "适用于数据范围较小或规则清晰的题。",
+            "complexity": "时间复杂度依题而定，通常 O(n)~O(n^2)",
+            "code_cpp": "// TODO: 请根据题意补全\n#include <bits/stdc++.h>\nusing namespace std;\nint main(){ios::sync_with_stdio(false);cin.tie(nullptr);return 0;}\n",
+            "tags": ["simulation", "implementation"],
+        },
+        {
+            "title": "解法二：优化思路（前缀/贪心/DP 视题而定）",
+            "idea_md": "分析状态与重复计算，尝试用前缀和、贪心或动态规划优化。",
+            "explanation_md": "比直接模拟更稳定，通常能覆盖更大数据规模。",
+            "complexity": "通常优于朴素解法",
+            "code_cpp": "// TODO: 请根据题意补全\n#include <bits/stdc++.h>\nusing namespace std;\nint main(){ios::sync_with_stdio(false);cin.tie(nullptr);return 0;}\n",
+            "tags": ["optimization", "dp"],
+        },
+    ]
+    return base[: max(1, max_solutions)]
+
+
+def load_problem(conn: sqlite3.Connection, problem_id: int) -> Problem:
+    cur = conn.execute(
+        "SELECT id,title,statement_md,difficulty,source,sample_input,sample_output FROM problems WHERE id=?",
+        (problem_id,),
+    )
+    row = cur.fetchone()
+    if row is None:
+        raise RuntimeError(f"problem not found: {problem_id}")
+    return Problem(
+        id=int(row[0]),
+        title=str(row[1] or ""),
+        statement_md=str(row[2] or ""),
+        difficulty=int(row[3] or 1),
+        source=str(row[4] or ""),
+        sample_input=str(row[5] or ""),
+        sample_output=str(row[6] or ""),
+    )
+
+
+def update_job(conn: sqlite3.Connection, job_id: int, **fields: Any) -> None:
+    if not fields:
+        return
+    keys = []
+    vals: list[Any] = []
+    for k, v in fields.items():
+        keys.append(f"{k}=?")
+        vals.append(v)
+    vals.append(job_id)
+    conn.execute(
+        f"UPDATE problem_solution_jobs SET {', '.join(keys)} WHERE id=?",
+        tuple(vals),
+    )
+    conn.commit()
+
+
+def store_solutions(conn: sqlite3.Connection, problem_id: int, rows: list[dict[str, Any]], source: str) -> int:
+    ts = now_sec()
+    conn.execute("DELETE FROM problem_solutions WHERE problem_id=?", (problem_id,))
+    saved = 0
+    seen_titles: set[str] = set()
+    for idx, row in enumerate(rows, start=1):
+        title = str(row.get("title") or f"解法 {idx}").strip()
+        if title in seen_titles:
+            continue
+        seen_titles.add(title)
+
+        idea_md = str(row.get("idea_md") or "").strip()
+        explanation_md = str(row.get("explanation_md") or "").strip()
+        code_cpp = str(row.get("code_cpp") or "").strip()
+        complexity = str(row.get("complexity") or "").strip()
+        tags = row.get("tags") if isinstance(row.get("tags"), list) else []
+
+        conn.execute(
+            """
+            INSERT INTO problem_solutions(
+              problem_id,variant,title,idea_md,explanation_md,code_cpp,complexity,tags_json,source,created_at,updated_at
+            ) VALUES(?,?,?,?,?,?,?,?,?,?,?)
+            """,
+            (
+                problem_id,
+                idx,
+                title,
+                idea_md,
+                explanation_md,
+                code_cpp,
+                complexity,
+                json.dumps(tags, ensure_ascii=False),
+                source,
+                ts,
+                ts,
+            ),
+        )
+        saved += 1
+    conn.commit()
+    return saved
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Generate multi-solution explanations")
+    parser.add_argument("--db-path", required=True)
+    parser.add_argument("--problem-id", type=int, required=True)
+    parser.add_argument("--job-id", type=int, required=True)
+    parser.add_argument("--max-solutions", type=int, default=3)
+    parser.add_argument("--timeout", type=int, default=90)
+    parser.add_argument("--retries", type=int, default=4)
+    parser.add_argument("--retry-sleep-sec", type=float, default=1.5)
+    args = parser.parse_args()
+
+    conn = sqlite3.connect(args.db_path)
+    conn.execute("PRAGMA foreign_keys=ON")
+    conn.execute("PRAGMA busy_timeout=5000")
+
+    ts = now_sec()
+    update_job(
+        conn,
+        args.job_id,
+        status="running",
+        progress=1,
+        message="starting",
+        started_at=ts,
+        updated_at=ts,
+    )
+
+    try:
+        problem = load_problem(conn, args.problem_id)
+
+        prompt = f"""
+请为下面这道 CSP 题生成 {max(1, min(5, args.max_solutions))} 种不同思路的题解（可从不同角度切入，例如模拟/贪心/DP/数据结构），并给出 C++ 参考代码。
+
+输出 JSON，格式固定：
+{{
+  "solutions": [
+    {{
+      "title": "解法标题",
+      "idea_md": "思路要点（Markdown）",
+      "explanation_md": "详细讲解（Markdown）",
+      "complexity": "时间/空间复杂度",
+      "code_cpp": "完整 C++17 代码",
+      "tags": ["标签1","标签2"]
+    }}
+  ]
+}}
+
+题目：{problem.title}
+难度：{problem.difficulty}
+来源：{problem.source}
+题面：
+{problem.statement_md[:12000]}
+样例输入：
+{problem.sample_input[:1200]}
+样例输出：
+{problem.sample_output[:1200]}
+""".strip()
+
+        update_job(conn, args.job_id, progress=25, message="requesting llm", updated_at=now_sec())
+
+        source = "fallback"
+        solutions: list[dict[str, Any]]
+        try:
+            content = llm_request(
+                prompt,
+                timeout=args.timeout,
+                retries=args.retries,
+                sleep_sec=args.retry_sleep_sec,
+            )
+            obj = extract_json_object(content)
+            raw = obj.get("solutions") if isinstance(obj, dict) else None
+            if not isinstance(raw, list) or len(raw) == 0:
+                raise RuntimeError("llm response missing solutions array")
+            solutions = [x for x in raw if isinstance(x, dict)]
+            if not solutions:
+                raise RuntimeError("llm response has empty valid solutions")
+            source = "llm"
+        except Exception:
+            solutions = fallback_solutions(args.max_solutions)
+
+        solutions = solutions[: max(1, min(5, args.max_solutions))]
+
+        update_job(conn, args.job_id, progress=70, message="writing solutions", updated_at=now_sec())
+        saved = store_solutions(conn, args.problem_id, solutions, source)
+
+        update_job(
+            conn,
+            args.job_id,
+            status="completed",
+            progress=100,
+            message=f"completed: {saved} solutions ({source})",
+            finished_at=now_sec(),
+            updated_at=now_sec(),
+        )
+        conn.close()
+        return 0
+    except Exception as exc:
+        update_job(
+            conn,
+            args.job_id,
+            status="failed",
+            progress=100,
+            message=f"failed: {str(exc)[:400]}",
+            finished_at=now_sec(),
+            updated_at=now_sec(),
+        )
+        conn.close()
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/import_luogu_csp.py
+++ b/scripts/import_luogu_csp.py
@@ -0,0 +1,904 @@
+#!/usr/bin/env python3
+"""Import Luogu CSP-J/S beginner problem set into local SQLite."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import re
+import sqlite3
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass
+from typing import Any
+from urllib.parse import quote
+
+import requests
+
+
+DEFAULT_BASE_URL = "https://www.luogu.com.cn"
+DEFAULT_TAG_IDS = [343, 342, 82, 83]  # CSP-J, CSP-S, NOIP-junior, NOIP-senior
+RETRYABLE_STATUS = {429, 500, 502, 503, 504}
+CONTEXT_RE = re.compile(
+    r'<script[^>]*id="lentille-context"[^>]*>(.*?)</script>', re.DOTALL
+)
+
+
+@dataclass
+class LuoguListItem:
+    pid: str
+    title: str
+    difficulty: int
+    tags: list[int]
+    total_submit: int
+    total_accepted: int
+    type: str
+
+
+@dataclass
+class UpsertRecord:
+    slug: str
+    title: str
+    statement_md: str
+    difficulty: int
+    source: str
+    statement_url: str
+    llm_profile_json: str
+    sample_input: str
+    sample_output: str
+    tags: list[str]
+
+
+def now_sec() -> int:
+    return int(time.time())
+
+
+def requests_retry_text(
+    session: requests.Session,
+    url: str,
+    *,
+    timeout: int,
+    retries: int,
+    sleep_sec: float,
+) -> str:
+    last_error: Exception | None = None
+    for attempt in range(1, retries + 1):
+        try:
+            resp = session.get(url, timeout=timeout)
+        except requests.RequestException as exc:
+            last_error = exc
+            if attempt < retries:
+                time.sleep(sleep_sec * attempt)
+                continue
+            raise RuntimeError(f"GET failed: {url}: {exc}") from exc
+
+        if resp.status_code in RETRYABLE_STATUS:
+            if attempt < retries:
+                time.sleep(sleep_sec * attempt)
+                continue
+            raise RuntimeError(f"GET failed after retry: {url}: {resp.status_code}")
+        if resp.status_code >= 400:
+            raise RuntimeError(f"GET failed: {url}: {resp.status_code}")
+        return resp.text
+
+    if last_error:
+        raise RuntimeError(f"GET failed: {url}: {last_error}") from last_error
+    raise RuntimeError(f"GET failed: {url}: unknown error")
+
+
+def extract_context_json(html_text: str) -> dict[str, Any]:
+    match = CONTEXT_RE.search(html_text)
+    if not match:
+        raise RuntimeError("lentille-context script not found")
+    try:
+        return json.loads(match.group(1))
+    except json.JSONDecodeError as exc:
+        raise RuntimeError("failed to parse lentille-context json") from exc
+
+
+def parse_tag_ids(raw: str) -> list[int]:
+    out: list[int] = []
+    for part in raw.split(","):
+        part = part.strip()
+        if not part:
+            continue
+        out.append(int(part))
+    if not out:
+        raise ValueError("at least one tag id is required")
+    return out
+
+
+def normalize_tag(text: str) -> str:
+    lower = text.strip().lower()
+    compact = re.sub(r"[^a-z0-9]+", "-", lower).strip("-")
+    return compact or text.strip()
+
+
+def ensure_problem_columns(conn: sqlite3.Connection) -> None:
+    cur = conn.cursor()
+    cur.execute("PRAGMA table_info(problems)")
+    cols = {str(row[1]) for row in cur.fetchall()}
+    needed = {
+        "sample_input": "ALTER TABLE problems ADD COLUMN sample_input TEXT NOT NULL DEFAULT ''",
+        "sample_output": "ALTER TABLE problems ADD COLUMN sample_output TEXT NOT NULL DEFAULT ''",
+        "statement_url": "ALTER TABLE problems ADD COLUMN statement_url TEXT NOT NULL DEFAULT ''",
+        "llm_profile_json": "ALTER TABLE problems ADD COLUMN llm_profile_json TEXT NOT NULL DEFAULT '{}'",
+    }
+    for col, sql in needed.items():
+        if col not in cols:
+            cur.execute(sql)
+    conn.commit()
+
+
+def ensure_core_tables(conn: sqlite3.Connection) -> None:
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS problems (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          slug TEXT NOT NULL UNIQUE,
+          title TEXT NOT NULL,
+          statement_md TEXT NOT NULL,
+          difficulty INTEGER NOT NULL DEFAULT 1,
+          source TEXT NOT NULL DEFAULT '',
+          statement_url TEXT NOT NULL DEFAULT '',
+          llm_profile_json TEXT NOT NULL DEFAULT '{}',
+          sample_input TEXT NOT NULL DEFAULT '',
+          sample_output TEXT NOT NULL DEFAULT '',
+          created_at INTEGER NOT NULL
+        )
+        """
+    )
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS problem_tags (
+          problem_id INTEGER NOT NULL,
+          tag TEXT NOT NULL,
+          PRIMARY KEY(problem_id, tag)
+        )
+        """
+    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_problem_tags_tag ON problem_tags(tag)")
+    conn.commit()
+
+
+def ensure_import_tables(conn: sqlite3.Connection) -> None:
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS import_jobs (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          status TEXT NOT NULL,
+          trigger TEXT NOT NULL DEFAULT 'manual',
+          total_count INTEGER NOT NULL DEFAULT 0,
+          processed_count INTEGER NOT NULL DEFAULT 0,
+          success_count INTEGER NOT NULL DEFAULT 0,
+          failed_count INTEGER NOT NULL DEFAULT 0,
+          options_json TEXT NOT NULL DEFAULT '{}',
+          last_error TEXT NOT NULL DEFAULT '',
+          started_at INTEGER NOT NULL,
+          finished_at INTEGER,
+          updated_at INTEGER NOT NULL,
+          created_at INTEGER NOT NULL
+        )
+        """
+    )
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS import_job_items (
+          id INTEGER PRIMARY KEY AUTOINCREMENT,
+          job_id INTEGER NOT NULL,
+          source_path TEXT NOT NULL,
+          status TEXT NOT NULL DEFAULT 'queued',
+          title TEXT NOT NULL DEFAULT '',
+          difficulty INTEGER NOT NULL DEFAULT 0,
+          problem_id INTEGER,
+          error_text TEXT NOT NULL DEFAULT '',
+          started_at INTEGER,
+          finished_at INTEGER,
+          updated_at INTEGER NOT NULL,
+          created_at INTEGER NOT NULL,
+          UNIQUE(job_id, source_path)
+        )
+        """
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_import_jobs_created_at ON import_jobs(created_at DESC)"
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_import_job_items_job_status "
+        "ON import_job_items(job_id, status, updated_at DESC)"
+    )
+    conn.commit()
+
+
+def create_import_job(
+    conn: sqlite3.Connection, trigger: str, total_count: int, options_json: str
+) -> int:
+    ts = now_sec()
+    cur = conn.cursor()
+    cur.execute(
+        """
+        INSERT INTO import_jobs(
+          status,trigger,total_count,processed_count,success_count,failed_count,
+          options_json,last_error,started_at,finished_at,updated_at,created_at
+        ) VALUES(?,?,?,?,?,?,?,?,?,?,?,?)
+        """,
+        (
+            "running",
+            trigger or "manual",
+            total_count,
+            0,
+            0,
+            0,
+            options_json,
+            "",
+            ts,
+            None,
+            ts,
+            ts,
+        ),
+    )
+    conn.commit()
+    return int(cur.lastrowid)
+
+
+def seed_import_items(
+    conn: sqlite3.Connection, job_id: int, items: list[LuoguListItem]
+) -> None:
+    ts = now_sec()
+    cur = conn.cursor()
+    cur.executemany(
+        """
+        INSERT OR IGNORE INTO import_job_items(
+          job_id,source_path,status,title,difficulty,problem_id,error_text,
+          started_at,finished_at,updated_at,created_at
+        ) VALUES(?,?,?,?,?,?,?,?,?,?,?)
+        """,
+        [
+            (
+                job_id,
+                item.pid,
+                "queued",
+                "",
+                0,
+                None,
+                "",
+                None,
+                None,
+                ts,
+                ts,
+            )
+            for item in items
+        ],
+    )
+    conn.commit()
+
+
+def update_import_item_success(
+    conn: sqlite3.Connection,
+    job_id: int,
+    source_path: str,
+    title: str,
+    difficulty: int,
+    problem_id: int,
+    note: str = "",
+) -> None:
+    ts = now_sec()
+    conn.execute(
+        """
+        UPDATE import_job_items
+        SET status='success',
+            title=?,
+            difficulty=?,
+            problem_id=?,
+            error_text=?,
+            started_at=COALESCE(started_at, ?),
+            finished_at=?,
+            updated_at=?
+        WHERE job_id=? AND source_path=?
+        """,
+        (title, difficulty, problem_id, note, ts, ts, ts, job_id, source_path),
+    )
+    conn.commit()
+
+
+def update_import_item_failed(
+    conn: sqlite3.Connection, job_id: int, source_path: str, error_text: str
+) -> None:
+    ts = now_sec()
+    conn.execute(
+        """
+        UPDATE import_job_items
+        SET status='failed',
+            error_text=?,
+            started_at=COALESCE(started_at, ?),
+            finished_at=?,
+            updated_at=?
+        WHERE job_id=? AND source_path=?
+        """,
+        (error_text[:500], ts, ts, ts, job_id, source_path),
+    )
+    conn.commit()
+
+
+def update_import_job_progress(
+    conn: sqlite3.Connection,
+    job_id: int,
+    processed_count: int,
+    success_count: int,
+    failed_count: int,
+    last_error: str,
+) -> None:
+    ts = now_sec()
+    conn.execute(
+        """
+        UPDATE import_jobs
+        SET processed_count=?,
+            success_count=?,
+            failed_count=?,
+            last_error=?,
+            updated_at=?
+        WHERE id=?
+        """,
+        (processed_count, success_count, failed_count, last_error[:500], ts, job_id),
+    )
+    conn.commit()
+
+
+def finish_import_job(
+    conn: sqlite3.Connection,
+    job_id: int,
+    success_count: int,
+    failed_count: int,
+    last_error: str,
+) -> None:
+    ts = now_sec()
+    status = "completed" if failed_count == 0 else "completed_with_errors"
+    conn.execute(
+        """
+        UPDATE import_jobs
+        SET status=?,
+            processed_count=total_count,
+            success_count=?,
+            failed_count=?,
+            last_error=?,
+            finished_at=?,
+            updated_at=?
+        WHERE id=?
+        """,
+        (status, success_count, failed_count, last_error[:500], ts, ts, job_id),
+    )
+    conn.commit()
+
+
+def upsert_problem(conn: sqlite3.Connection, rec: UpsertRecord) -> tuple[int, bool]:
+    cur = conn.cursor()
+    cur.execute("SELECT id FROM problems WHERE slug=?", (rec.slug,))
+    row = cur.fetchone()
+
+    if row is None:
+        cur.execute(
+            """
+            INSERT INTO problems(
+              slug,title,statement_md,difficulty,source,statement_url,llm_profile_json,
+              sample_input,sample_output,created_at
+            ) VALUES(?,?,?,?,?,?,?,?,?,?)
+            """,
+            (
+                rec.slug,
+                rec.title,
+                rec.statement_md,
+                rec.difficulty,
+                rec.source,
+                rec.statement_url,
+                rec.llm_profile_json,
+                rec.sample_input,
+                rec.sample_output,
+                now_sec(),
+            ),
+        )
+        problem_id = int(cur.lastrowid)
+        inserted = True
+    else:
+        problem_id = int(row[0])
+        cur.execute(
+            """
+            UPDATE problems
+            SET title=?,statement_md=?,difficulty=?,source=?,statement_url=?,
+                llm_profile_json=?,sample_input=?,sample_output=?
+            WHERE id=?
+            """,
+            (
+                rec.title,
+                rec.statement_md,
+                rec.difficulty,
+                rec.source,
+                rec.statement_url,
+                rec.llm_profile_json,
+                rec.sample_input,
+                rec.sample_output,
+                problem_id,
+            ),
+        )
+        inserted = False
+
+    cur.execute("DELETE FROM problem_tags WHERE problem_id=?", (problem_id,))
+    for tag in rec.tags:
+        cur.execute(
+            "INSERT OR IGNORE INTO problem_tags(problem_id,tag) VALUES(?,?)",
+            (problem_id, tag),
+        )
+    conn.commit()
+    return problem_id, inserted
+
+
+def markdown_to_absolute(base_url: str, text: str) -> str:
+    if not text:
+        return ""
+    text = re.sub(r"\]\(/", f"]({base_url}/", text)
+    text = re.sub(r"!\[\]\(/", f"![]({base_url}/", text)
+    return text
+
+
+def build_statement_md(base_url: str, pid: str, detail: dict[str, Any]) -> str:
+    content = detail.get("content") or {}
+    title = str(detail.get("title") or pid).strip()
+
+    background = markdown_to_absolute(base_url, str(content.get("background") or "").strip())
+    description = markdown_to_absolute(base_url, str(content.get("description") or "").strip())
+    format_i = markdown_to_absolute(base_url, str(content.get("formatI") or "").strip())
+    format_o = markdown_to_absolute(base_url, str(content.get("formatO") or "").strip())
+    hint = markdown_to_absolute(base_url, str(content.get("hint") or "").strip())
+
+    lines = [
+        f"# {pid} {title}",
+        "",
+        f"- Source: Luogu",
+        f"- Problem URL: {base_url}/problem/{pid}",
+    ]
+    if background:
+        lines += ["", "## Background", "", background]
+    if description:
+        lines += ["", "## Description", "", description]
+    if format_i:
+        lines += ["", "## Input Format", "", format_i]
+    if format_o:
+        lines += ["", "## Output Format", "", format_o]
+    if hint:
+        lines += ["", "## Hint", "", hint]
+    return "\n".join(lines).strip()
+
+
+def build_record(
+    base_url: str,
+    list_item: LuoguListItem,
+    detail: dict[str, Any],
+    tag_catalog: dict[int, dict[str, Any]],
+) -> UpsertRecord:
+    pid = list_item.pid
+    title = str(detail.get("title") or list_item.title or pid).strip()
+    difficulty = int(detail.get("difficulty") or list_item.difficulty or 1)
+    statement_url = f"{base_url}/problem/{pid}"
+    statement_md = build_statement_md(base_url, pid, detail)
+
+    samples = detail.get("samples") or []
+    sample_input = ""
+    sample_output = ""
+    if samples and isinstance(samples[0], list) and len(samples[0]) >= 2:
+        sample_input = str(samples[0][0] or "")
+        sample_output = str(samples[0][1] or "")
+
+    detail_tag_ids = detail.get("tags") or []
+    if not isinstance(detail_tag_ids, list):
+        detail_tag_ids = []
+    tag_ids = list(dict.fromkeys([*list_item.tags, *detail_tag_ids]))
+
+    tag_names: list[str] = []
+    knowledge_points: list[str] = []
+    normalized_tags: set[str] = {"luogu", "csp"}
+    for tid in tag_ids:
+        tag = tag_catalog.get(int(tid))
+        if not tag:
+            continue
+        name = str(tag.get("name") or "").strip()
+        if not name:
+            continue
+        tag_names.append(name)
+        normalized_tags.add(normalize_tag(name))
+
+        ttype = int(tag.get("type") or 0)
+        if ttype == 2 and len(knowledge_points) < 8:
+            knowledge_points.append(name)
+
+        upper_name = name.upper()
+        if "CSP-J" in upper_name:
+            normalized_tags.add("csp-j")
+        if "CSP-S" in upper_name:
+            normalized_tags.add("csp-s")
+        if "NOIP 普及" in name:
+            normalized_tags.add("noip-junior")
+        if "NOIP 提高" in name:
+            normalized_tags.add("noip-senior")
+
+    if not knowledge_points:
+        knowledge_points = tag_names[:6]
+
+    answer = "See official solutions/discussions and verify with your own proof."
+    explanation = (
+        "This problem is imported from Luogu. The statement and examples are preserved; "
+        "practice with your own derivation and compare with accepted solutions."
+    )
+
+    profile = {
+        "schema_version": 1,
+        "platform": "luogu",
+        "pid": pid,
+        "difficulty": difficulty,
+        "tags": tag_names,
+        "tag_ids": tag_ids,
+        "knowledge_points": knowledge_points,
+        "answer": answer,
+        "explanation": explanation,
+        "stats": {
+            "total_submit": int(list_item.total_submit),
+            "total_accepted": int(list_item.total_accepted),
+        },
+        "source": {
+            "url": statement_url,
+            "type": list_item.type,
+        },
+        "generated_at": now_sec(),
+    }
+
+    all_tags = sorted({t for t in normalized_tags if t})[:30]
+    return UpsertRecord(
+        slug=f"luogu-{pid.lower()}",
+        title=f"{pid} {title}",
+        statement_md=statement_md,
+        difficulty=max(1, min(10, difficulty)),
+        source=f"luogu:{pid}",
+        statement_url=statement_url,
+        llm_profile_json=json.dumps(profile, ensure_ascii=False),
+        sample_input=sample_input,
+        sample_output=sample_output,
+        tags=all_tags,
+    )
+
+
+def build_fallback_record(
+    base_url: str,
+    list_item: LuoguListItem,
+    tag_catalog: dict[int, dict[str, Any]],
+    error_text: str,
+) -> UpsertRecord:
+    fallback_detail: dict[str, Any] = {
+        "title": list_item.title,
+        "difficulty": list_item.difficulty,
+        "tags": list_item.tags,
+        "samples": [],
+        "content": {
+            "description": (
+                "题面抓取失败（已自动降级导入）。"
+                f"请访问原题链接查看完整题面：{base_url}/problem/{list_item.pid}"
+            )
+        },
+    }
+    rec = build_record(base_url, list_item, fallback_detail, tag_catalog)
+    profile = json.loads(rec.llm_profile_json)
+    profile["fallback_import"] = True
+    profile["fallback_reason"] = error_text[:240]
+    rec.llm_profile_json = json.dumps(profile, ensure_ascii=False)
+    return rec
+
+
+def fetch_tag_catalog(
+    session: requests.Session,
+    base_url: str,
+    timeout: int,
+    retries: int,
+    sleep_sec: float,
+) -> dict[int, dict[str, Any]]:
+    text = requests_retry_text(
+        session,
+        f"{base_url}/_lfe/tags/zh-CN",
+        timeout=timeout,
+        retries=retries,
+        sleep_sec=sleep_sec,
+    )
+    payload = json.loads(text)
+    tags = payload.get("tags") or []
+    out: dict[int, dict[str, Any]] = {}
+    for row in tags:
+        if not isinstance(row, dict) or "id" not in row:
+            continue
+        out[int(row["id"])] = row
+    return out
+
+
+def fetch_list_page(
+    session: requests.Session,
+    base_url: str,
+    tags_csv: str,
+    page: int,
+    timeout: int,
+    retries: int,
+    sleep_sec: float,
+) -> tuple[int, int, list[LuoguListItem]]:
+    url = f"{base_url}/problem/list?type=all&tag={quote(tags_csv)}&page={page}"
+    html_text = requests_retry_text(
+        session, url, timeout=timeout, retries=retries, sleep_sec=sleep_sec
+    )
+    ctx = extract_context_json(html_text)
+    problems = ((ctx.get("data") or {}).get("problems") or {})
+    count = int(problems.get("count") or 0)
+    per_page = int(problems.get("perPage") or 50)
+
+    result: list[LuoguListItem] = []
+    for row in problems.get("result") or []:
+        if not isinstance(row, dict):
+            continue
+        pid = str(row.get("pid") or "").strip()
+        if not pid:
+            continue
+        tags = row.get("tags") if isinstance(row.get("tags"), list) else []
+        result.append(
+            LuoguListItem(
+                pid=pid,
+                title=str(row.get("title") or "").strip(),
+                difficulty=int(row.get("difficulty") or 1),
+                tags=[int(x) for x in tags if isinstance(x, int)],
+                total_submit=int(row.get("totalSubmit") or 0),
+                total_accepted=int(row.get("totalAccepted") or 0),
+                type=str(row.get("type") or "").strip(),
+            )
+        )
+    return count, per_page, result
+
+
+def fetch_problem_detail(
+    base_url: str,
+    pid: str,
+    timeout: int,
+    retries: int,
+    sleep_sec: float,
+) -> dict[str, Any]:
+    session = requests.Session()
+    session.headers.update(
+        {
+            "User-Agent": (
+                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+            ),
+            "Referer": f"{base_url}/problem/list",
+        }
+    )
+    html_text = requests_retry_text(
+        session,
+        f"{base_url}/problem/{pid}",
+        timeout=timeout,
+        retries=retries,
+        sleep_sec=sleep_sec,
+    )
+    ctx = extract_context_json(html_text)
+    detail = ((ctx.get("data") or {}).get("problem") or {})
+    if not isinstance(detail, dict):
+        raise RuntimeError(f"problem detail invalid: {pid}")
+    return detail
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Import Luogu CSP-J/S problem set")
+    parser.add_argument("--db-path", required=True, help="SQLite db path")
+    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
+    parser.add_argument(
+        "--tag-ids",
+        default=",".join(str(x) for x in DEFAULT_TAG_IDS),
+        help="Comma separated Luogu tag IDs",
+    )
+    parser.add_argument("--workers", type=int, default=3)
+    parser.add_argument("--max-problems", type=int, default=0)
+    parser.add_argument("--timeout", type=int, default=25)
+    parser.add_argument("--retries", type=int, default=5)
+    parser.add_argument("--retry-sleep-sec", type=float, default=1.2)
+    parser.add_argument("--clear-existing", action="store_true")
+    parser.add_argument("--clear-all-problems", action="store_true")
+    parser.add_argument("--job-trigger", default="manual")
+    parser.add_argument("--clear-existing-source-prefix", default="")
+    parser.add_argument("--skip-llm", action="store_true")
+    parser.add_argument("--llm-limit", type=int, default=0)
+    args = parser.parse_args()
+
+    tag_ids = parse_tag_ids(args.tag_ids)
+    tags_csv = ",".join(str(x) for x in tag_ids)
+
+    session = requests.Session()
+    session.headers.update(
+        {
+            "User-Agent": (
+                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+            ),
+            "Referer": f"{args.base_url}/problem/list",
+        }
+    )
+
+    tag_catalog = fetch_tag_catalog(
+        session,
+        args.base_url,
+        timeout=args.timeout,
+        retries=args.retries,
+        sleep_sec=args.retry_sleep_sec,
+    )
+
+    total_count, per_page, first_page_items = fetch_list_page(
+        session,
+        args.base_url,
+        tags_csv,
+        page=1,
+        timeout=args.timeout,
+        retries=args.retries,
+        sleep_sec=args.retry_sleep_sec,
+    )
+    total_pages = max(1, math.ceil(max(1, total_count) / max(1, per_page)))
+
+    all_items: dict[str, LuoguListItem] = {item.pid: item for item in first_page_items}
+    for page in range(2, total_pages + 1):
+        _, _, page_items = fetch_list_page(
+            session,
+            args.base_url,
+            tags_csv,
+            page=page,
+            timeout=args.timeout,
+            retries=args.retries,
+            sleep_sec=args.retry_sleep_sec,
+        )
+        for item in page_items:
+            all_items[item.pid] = item
+
+    selected = sorted(all_items.values(), key=lambda x: x.pid)
+    if args.max_problems > 0:
+        selected = selected[: args.max_problems]
+
+    conn = sqlite3.connect(args.db_path)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA foreign_keys=ON")
+    conn.execute("PRAGMA busy_timeout=5000")
+    ensure_core_tables(conn)
+    ensure_problem_columns(conn)
+    ensure_import_tables(conn)
+
+    cleared_count = 0
+    if args.clear_all_problems:
+        cur = conn.execute("SELECT COUNT(1) FROM problems")
+        cleared_count = int(cur.fetchone()[0] or 0)
+        conn.execute("DELETE FROM problems")
+        conn.commit()
+    elif args.clear_existing:
+        cur = conn.execute("SELECT COUNT(1) FROM problems WHERE source LIKE 'luogu:%'")
+        cleared_count = int(cur.fetchone()[0] or 0)
+        conn.execute("DELETE FROM problems WHERE source LIKE 'luogu:%'")
+        conn.commit()
+
+    inserted = 0
+    updated = 0
+    failed = 0
+    fallback_used = 0
+    total = len(selected)
+    last_error = ""
+
+    options_json = json.dumps(
+        {
+            "source": "luogu",
+            "tag_ids": tag_ids,
+            "workers": max(1, args.workers),
+            "max_problems": args.max_problems,
+            "clear_existing": bool(args.clear_existing),
+            "clear_all_problems": bool(args.clear_all_problems),
+        },
+        ensure_ascii=False,
+    )
+    job_id = create_import_job(conn, args.job_trigger, total, options_json)
+    seed_import_items(conn, job_id, selected)
+
+    with ThreadPoolExecutor(max_workers=max(1, args.workers)) as executor:
+        futures = {
+            executor.submit(
+                fetch_problem_detail,
+                args.base_url,
+                item.pid,
+                args.timeout,
+                args.retries,
+                args.retry_sleep_sec,
+            ): item
+            for item in selected
+        }
+        done_count = 0
+        for future in as_completed(futures):
+            item = futures[future]
+            done_count += 1
+            try:
+                detail = future.result()
+                record = build_record(args.base_url, item, detail, tag_catalog)
+                problem_id, is_insert = upsert_problem(conn, record)
+                if is_insert:
+                    inserted += 1
+                else:
+                    updated += 1
+                update_import_item_success(
+                    conn,
+                    job_id,
+                    item.pid,
+                    record.title,
+                    record.difficulty,
+                    problem_id,
+                )
+                print(
+                    f"[{done_count}/{total}] {item.pid} -> {record.title} "
+                    f"(difficulty={record.difficulty})",
+                    flush=True,
+                )
+            except Exception as exc:
+                try:
+                    record = build_fallback_record(
+                        args.base_url, item, tag_catalog, str(exc)
+                    )
+                    problem_id, is_insert = upsert_problem(conn, record)
+                    if is_insert:
+                        inserted += 1
+                    else:
+                        updated += 1
+                    fallback_used += 1
+                    update_import_item_success(
+                        conn,
+                        job_id,
+                        item.pid,
+                        record.title,
+                        record.difficulty,
+                        problem_id,
+                        note=f"fallback: {str(exc)[:300]}",
+                    )
+                    print(f"[fallback] {item.pid}: {exc}", flush=True)
+                except Exception as inner_exc:
+                    failed += 1
+                    last_error = str(inner_exc)
+                    update_import_item_failed(
+                        conn,
+                        job_id,
+                        item.pid,
+                        f"{exc}; fallback failed: {inner_exc}",
+                    )
+                    print(f"[skip] {item.pid}: {exc}; fallback failed: {inner_exc}", flush=True)
+            update_import_job_progress(
+                conn,
+                job_id,
+                done_count,
+                inserted + updated,
+                failed,
+                last_error,
+            )
+
+    finish_import_job(conn, job_id, inserted + updated, failed, last_error)
+    conn.close()
+
+    print(
+        json.dumps(
+            {
+                "db_path": args.db_path,
+                "tags": tag_ids,
+                "selected_count": total,
+                "inserted": inserted,
+                "updated": updated,
+                "failed": failed,
+                "fallback_used": fallback_used,
+                "cleared_count": cleared_count,
+                "job_id": job_id,
+            },
+            ensure_ascii=False,
+            indent=2,
+        )
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/import_winterant_oi.py
+++ b/scripts/import_winterant_oi.py