- Score max changed from 100 to 60, rating max from 10 to 6 - Note scoring now awards actual rating points (delta-based) - Re-scoring only awards/deducts the difference - Rating history shows note_score entries with problem link - LLM prompt includes problem statement context for better evaluation - LLM scoring dimensions: 题意理解/思路算法/代码记录/踩坑反思 (15 each) - Minecraft-themed UI: 矿石鉴定, 探索笔记, 存入宝典, etc. - Fallback scoring adjusted for 60-point scale - Handle LLM markdown code fence wrapping in response Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
171 行
6.1 KiB
Python
可执行文件
171 行
6.1 KiB
Python
可执行文件
#!/usr/bin/env python3
|
||
"""Score a learning note (0-60) and map to rating (0-6) via LLM with fallback."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import time
|
||
from typing import Any, Dict, Optional
|
||
|
||
import requests
|
||
|
||
|
||
def env(name: str, default: str = "") -> str:
|
||
v = os.getenv(name, "").strip()
|
||
return v if v else default
|
||
|
||
|
||
def load_input(path: str) -> Dict[str, Any]:
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
if not isinstance(data, dict):
|
||
raise ValueError("input json must be object")
|
||
return data
|
||
|
||
|
||
def fallback(note: str) -> Dict[str, Any]:
|
||
n = note.strip()
|
||
score = 20
|
||
if len(n) >= 200:
|
||
score += 10
|
||
if len(n) >= 500:
|
||
score += 5
|
||
if "```" in n:
|
||
score += 10
|
||
if "踩坑" in n or "错误" in n or "debug" in n.lower():
|
||
score += 8
|
||
if "总结" in n or "注意" in n:
|
||
score += 7
|
||
score = min(60, score)
|
||
rating = max(0, min(6, round(score / 10)))
|
||
feedback_md = (
|
||
"### ⛏️ 矿石鉴定报告(规则兜底)\n"
|
||
f"- 品质:**{score}/60** ⚡ 经验值:**+{rating}**\n"
|
||
"\n### 🏆 已获成就\n"
|
||
"- 记录了探索过程(检测到一定的笔记内容)。\n"
|
||
"\n### 📜 升级指南\n"
|
||
"- 写清本次**探索目标**、**核心知识点**、**代码配方**。\n"
|
||
"- 至少记录 1 个你遇到的陷阱(如格式、编译报错)以及修复方案。\n"
|
||
"- 最后用 3-5 行做总结,铸造你的知识宝典。\n"
|
||
)
|
||
return {"score": score, "rating": rating, "feedback_md": feedback_md, "model_name": "fallback-rules"}
|
||
|
||
|
||
def call_llm(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||
api_url = env("OI_LLM_API_URL") or env("CSP_LLM_API_URL")
|
||
api_key = env("OI_LLM_API_KEY") or env("CSP_LLM_API_KEY")
|
||
model = env("OI_LLM_MODEL", "qwen3-max")
|
||
if not api_url:
|
||
raise RuntimeError("missing OI_LLM_API_URL")
|
||
|
||
problem_title = payload.get("problem_title", "")
|
||
problem_statement = payload.get("problem_statement", "")
|
||
# Truncate long statements to save tokens
|
||
if len(problem_statement) > 2000:
|
||
problem_statement = problem_statement[:2000] + "\n...(truncated)"
|
||
|
||
system = (
|
||
"你是一位 Minecraft 风格的C++竞赛教练(矿石鉴定大师),请结合题目内容对学习笔记打分。\n"
|
||
"评分满分60分,经验值(rating)=round(score/10),范围0-6。\n"
|
||
"评分维度:\n"
|
||
"- 题意理解 15分:是否正确理解题目要求\n"
|
||
"- 思路与算法 15分:解题思路是否清晰、算法是否正确\n"
|
||
"- 代码记录 15分:是否有代码片段/模板/关键实现\n"
|
||
"- 踩坑反思 15分:是否记录了坑点、调试过程、经验教训\n"
|
||
"请用 Minecraft 游戏风格的语言给出反馈,使用⛏️🏆📜💎⚡等图标。\n"
|
||
"输出必须是纯JSON(不要markdown代码块),不要输出其他任何文字。"
|
||
)
|
||
user = {
|
||
"task": "结合题目对学习笔记评分并给出改进建议",
|
||
"problem": {
|
||
"id": payload.get("problem_id"),
|
||
"title": problem_title,
|
||
"statement": problem_statement,
|
||
},
|
||
"note": payload.get("note", ""),
|
||
"output_json_schema": {
|
||
"score": "integer 0-60",
|
||
"rating": "integer 0-6",
|
||
"feedback_md": "markdown string, Minecraft style",
|
||
"model_name": "string",
|
||
},
|
||
}
|
||
|
||
headers = {"Content-Type": "application/json"}
|
||
if api_key:
|
||
headers["Authorization"] = f"Bearer {api_key}"
|
||
|
||
body = {
|
||
"model": model,
|
||
"stream": False,
|
||
"temperature": 0.2,
|
||
"messages": [
|
||
{"role": "system", "content": system},
|
||
{"role": "user", "content": json.dumps(user, ensure_ascii=False)},
|
||
],
|
||
}
|
||
|
||
last: Optional[Exception] = None
|
||
for attempt in range(4):
|
||
try:
|
||
resp = requests.post(api_url, headers=headers, json=body, timeout=50)
|
||
if resp.status_code < 500:
|
||
resp.raise_for_status()
|
||
else:
|
||
raise RuntimeError(f"HTTP {resp.status_code}")
|
||
data = resp.json()
|
||
content = data["choices"][0]["message"]["content"]
|
||
# Strip markdown code fence if present
|
||
c = content.strip()
|
||
if c.startswith("```"):
|
||
c = c.split("\n", 1)[-1]
|
||
if c.endswith("```"):
|
||
c = c[:-3]
|
||
c = c.strip()
|
||
parsed = json.loads(c)
|
||
if not isinstance(parsed, dict):
|
||
raise ValueError("model output not object")
|
||
score = int(parsed.get("score", 0))
|
||
score = max(0, min(60, score))
|
||
rating = int(parsed.get("rating", round(score / 10)))
|
||
rating = max(0, min(6, rating))
|
||
feedback_md = str(parsed.get("feedback_md", "")).strip() or "### ⛏️ 矿石鉴定报告\n- 请补充更多内容(探索目标/代码配方/总结)。"
|
||
model_name = str(parsed.get("model_name", model)).strip() or model
|
||
return {"score": score, "rating": rating, "feedback_md": feedback_md, "model_name": model_name}
|
||
except Exception as e: # noqa: BLE001
|
||
last = e
|
||
time.sleep(0.6 * (attempt + 1))
|
||
|
||
raise RuntimeError(str(last) if last else "llm failed")
|
||
|
||
|
||
def main() -> int:
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--input-file", required=True)
|
||
args = ap.parse_args()
|
||
|
||
payload = load_input(args.input_file)
|
||
note = str(payload.get("note", ""))
|
||
if not note.strip():
|
||
print(
|
||
json.dumps(
|
||
{"score": 0, "rating": 0, "feedback_md": "### ⛏️ 空白卷轴\n请先写下你的探索笔记再进行鉴定。", "model_name": "validator"},
|
||
ensure_ascii=False,
|
||
)
|
||
)
|
||
return 0
|
||
|
||
try:
|
||
out = call_llm(payload)
|
||
except Exception:
|
||
out = fallback(note)
|
||
|
||
print(json.dumps(out, ensure_ascii=False))
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|