diff --git a/README.md b/README.md index 696eff2..a8e6efe 100644 --- a/README.md +++ b/README.md @@ -62,11 +62,13 @@ - 文本类任务使用 `LLM_API_URL` / `LLM_API_KEY` / `LLM_MODEL` - 图片类任务可单独指定 `LLM_VISION_API_URL` / `LLM_VISION_API_KEY` / `LLM_VISION_MODEL` - 所有图片输入都要求可从公网访问,因此本地相对路径会通过 `APP_PUBLIC_BASE_URL` 规范化为绝对 URL +- 若视觉模型链路返回非标准 JSON 或缺失数组字段,服务端会先做结构兼容和字段补全,再尝试生成视觉报告 - 若视觉模型链路不可用,系统会自动回退到结构化指标驱动的文本纠正,避免任务直接失败 - 系统内置“视觉标准图库”页面 `/vision-lab`,可把公网网球参考图入库并保存每次识别结果 - `ADMIN_USERNAMES` 可指定哪些用户名账号拥有 admin 视角,例如 `H1` - 用户名登录支持直接进入系统;仅首次创建新用户时需要填写 `REGISTRATION_INVITE_CODE` - 新用户首次登录时只需提交一次用户名;若用户名不存在才需要额外填写邀请码 +- `vision-lab` 支持对历史 `fallback/failed` 记录重新排队,便于修复上游返回不稳定导致的旧数据 ## Quick Start diff --git a/client/src/pages/VisionLab.tsx b/client/src/pages/VisionLab.tsx index c8b2a9d..1156c95 100644 --- a/client/src/pages/VisionLab.tsx +++ b/client/src/pages/VisionLab.tsx @@ -100,6 +100,22 @@ export default function VisionLab() { onError: (error) => toast.error(`批量视觉测试提交失败: ${error.message}`), }); + const retryRunMutation = trpc.vision.retryRun.useMutation({ + onSuccess: () => { + toast.success("视觉记录已重新加入队列"); + utils.vision.runs.invalidate(); + }, + onError: (error) => toast.error(`重新执行失败: ${error.message}`), + }); + + const retryFallbacksMutation = trpc.vision.retryFallbacks.useMutation({ + onSuccess: (data) => { + toast.success(`已重新排队 ${data.count} 条历史视觉记录`); + utils.vision.runs.invalidate(); + }, + onError: (error) => toast.error(`批量修复失败: ${error.message}`), + }); + useEffect(() => { if (activeTask.data?.status === "succeeded" || activeTask.data?.status === "failed") { utils.vision.runs.invalidate(); @@ -131,15 +147,26 @@ export default function VisionLab() {
{user?.role === "admin" ? ( - + <> + + + ) : null} +
+ ) : null} + {run.expectedFocus?.length ? (
{run.expectedFocus.map((item) => ( diff --git a/docs/FEATURES.md b/docs/FEATURES.md index 99b3132..ac5ccb1 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -25,7 +25,9 @@ - 训练数据回写:实时分析与录制数据自动写入训练记录、日训练聚合、成就系统和 NTRP 评分 - 动作纠正:支持文本纠正和多模态纠正两条链路,统一通过后台任务执行 - 多模态图片输入:上传关键帧后会转换为公网可访问的绝对 URL,再提交给视觉模型 +- 视觉结果规范化:即使上游模型返回的是宽松 JSON、Markdown 包裹 JSON 或缺失数组字段,服务端也会先做结构兼容与默认值补齐 - 视觉标准图库:内置网球公网参考图,可直接发起视觉识别测试并保存结果 +- 历史视觉修复:`vision-lab` 支持对旧的 `fallback/failed` 视觉记录重新排队修复,admin 可批量修复历史降级记录 - 视频库:集中展示录制结果、上传结果和分析摘要 - PC 轻剪辑:视频库内可直接打开轻剪辑工作台,支持预览、设定入点/出点、建议片段和草稿导出 diff --git a/docs/testing.md b/docs/testing.md index 2af3146..c732831 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -104,6 +104,12 @@ pnpm exec tsx -e 'import "dotenv/config"; import { invokeLLM } from "./server/_c - 运行单张或批量测试,确认结果会写入 `vision_test_runs` - 若上游视觉网关不可用,记录应显示 `fallback` +2026-03-15 额外完成了多模态兼容与历史修复验证: + +- 使用真实公网网球图片调用视觉链路,确认服务端能兼容上游返回的非标准 JSON 字段 +- 重跑历史 3 条 `fallback` 标准图记录,确认已全部转为 `visionStatus=ok` +- Playwright 真实站点检查 `https://te.hao.work/vision-lab`,确认页面不再出现 `Cannot read properties of undefined (reading 'join')` + ## Production smoke checks 部署到宿主机后,建议至少补以下联测: diff --git a/docs/verified-features.md b/docs/verified-features.md index 81fc8b3..2316a57 100644 --- a/docs/verified-features.md +++ b/docs/verified-features.md @@ -1,12 +1,12 @@ # Verified Features -本文档记录当前已经通过自动化验证或构建验证的项目。更新时间:2026-03-15 02:15 CST。 +本文档记录当前已经通过自动化验证或构建验证的项目。更新时间:2026-03-15 02:29 CST。 ## 最新完整验证记录 - 通过命令:`pnpm verify` -- 验证时间:2026-03-15 02:15 CST -- 结果摘要:`pnpm check` 通过,`pnpm test` 通过(95/95),`pnpm test:go` 通过,`pnpm build` 通过,`pnpm test:e2e` 通过(7/7) +- 验证时间:2026-03-15 02:26 - 02:29 CST +- 结果摘要:`pnpm verify` 通过,`pnpm test` 通过(99/99),`pnpm test:go` 通过,`pnpm build` 通过,`pnpm test:e2e` 通过(7/7) - 数据库状态:已执行 `set -a && source .env && set +a && pnpm exec drizzle-kit migrate`,`0007_grounded_live_ops` 已成功应用 ## 生产部署联测 @@ -23,6 +23,8 @@ | 新用户邀请码校验 | Playwright 验证无邀请码被拦截、正确邀请码 `CA2026` 可创建新账号 | 通过 | | 日志页访问 | Playwright 以 `H1` 登录并访问 `/logs` | 通过 | | 生产训练 / 实时分析 / 录制 / 视频库页面加载 | Playwright 访问 `/training`、`/live-camera`、`/recorder`、`/videos` | 通过 | +| 生产视觉标准图库页面 | Playwright 登录后访问 `/vision-lab`,未捕获 `pageerror` / `console.error` | 通过 | +| 生产视觉历史修复 | 重跑历史 3 条 `fallback` 标准图记录后,`visionStatus` 全部恢复为 `ok` | 通过 | | 生产视频库轻剪辑入口 | 本地 `pnpm test:e2e` + 真实站点 `/videos` smoke | 通过 | | 生产训练计划后台任务提交 | Playwright 点击训练计划生成按钮并收到后台任务反馈 | 通过 | | 生产移动端录制焦点视图 | Playwright 移动端视口打开 `/recorder` 并验证焦点入口与操作壳层 | 通过 | @@ -93,8 +95,9 @@ | `.env` 中的 `LLM_API_URL` / `LLM_API_KEY` / `LLM_MODEL` | `pnpm test:llm` | 通过 | | `https://one.hao.work/v1/chat/completions` 联通性 | `pnpm test:llm` 实际返回文本 | 通过 | | 视觉模型独立配置路径 | `server/_core/llm.test.ts` + 手工 smoke 检查 | 通过 | +| 视觉返回兼容解析 | `server/vision.test.ts` + 真实图片 smoke | 通过 | | 视觉标准图库入库 | MySQL 中 `vision_reference_images` 已写入 5 张 Commons 网球参考图 | 通过 | -| 视觉测试结果入库 | MySQL 中 `vision_test_runs` 已写入 3 条真实测试结果 | 通过 | +| 视觉测试结果入库 | MySQL 中 `vision_test_runs` 已写入 3 条真实测试结果,且历史 `fallback` 已修复为 `ok` | 通过 | | H1 全量可见性 | `H1` 用户已提升为 `admin`,可读取全部视觉测试记录;Playwright 真实站点检查通过 | 通过 | ## 已知非阻断警告 @@ -103,7 +106,7 @@ - `pnpm build` 仍有 Vite 大 chunk 警告;当前属于性能优化待办,不影响本次产物生成 - Playwright 运行依赖 mocked media/network,不等价于真机摄像头、真实弱网和真实 WebRTC 质量验收 - 当前上游视觉网关可能忽略 `LLM_VISION_MODEL` 并回退为文本模型;服务端已实现自动降级,任务不会因此直接失败 -- 2026-03-15 的真实标准图测试中,正手 / 反手 / 发球三条记录均以 `fallback` 完成,说明当前上游视觉网关仍未稳定返回结构化视觉结果 +- 上游视觉网关当前返回的 `model` 仍可能显示为 `qwen3.5-plus`,且响应格式不稳定;服务端已增加兼容解析与默认值补齐,避免再次因结构差异直接降级 - 开发服务器启动阶段仍会打印 `OAUTH_SERVER_URL` 未配置提示;当前用户名登录、mock auth 和自动化测试不受影响 ## 当前未纳入自动验证的内容 diff --git a/server/db.ts b/server/db.ts index 7440aa6..7f42f5e 100644 --- a/server/db.ts +++ b/server/db.ts @@ -1,4 +1,4 @@ -import { eq, desc, and, asc, lte, gte, sql } from "drizzle-orm"; +import { eq, desc, and, asc, lte, gte, or, sql } from "drizzle-orm"; import { drizzle } from "drizzle-orm/mysql2"; import { InsertUser, users, @@ -1280,6 +1280,56 @@ export async function listVisionTestRuns(userId?: number, limit = 50) { .limit(limit); } +export async function getVisionTestRunById(runId: number) { + const db = await getDb(); + if (!db) return null; + + const [row] = await db.select({ + id: visionTestRuns.id, + taskId: visionTestRuns.taskId, + userId: visionTestRuns.userId, + status: visionTestRuns.status, + visionStatus: visionTestRuns.visionStatus, + title: visionTestRuns.title, + }).from(visionTestRuns) + .where(eq(visionTestRuns.id, runId)) + .limit(1); + + return row || null; +} + +export async function listRepairableVisionTestRuns(limit = 50) { + const db = await getDb(); + if (!db) return []; + + return db.select({ + id: visionTestRuns.id, + taskId: visionTestRuns.taskId, + userId: visionTestRuns.userId, + title: visionTestRuns.title, + status: visionTestRuns.status, + visionStatus: visionTestRuns.visionStatus, + }).from(visionTestRuns) + .where(or(eq(visionTestRuns.visionStatus, "fallback"), eq(visionTestRuns.status, "failed"))) + .orderBy(desc(visionTestRuns.createdAt)) + .limit(limit); +} + +export async function resetVisionTestRun(taskId: string) { + const db = await getDb(); + if (!db) return; + + await db.update(visionTestRuns).set({ + status: "queued", + visionStatus: "pending", + summary: null, + corrections: null, + report: null, + warning: null, + error: null, + }).where(eq(visionTestRuns.taskId, taskId)); +} + export async function completeVisionTestRun(taskId: string, data: { visionStatus: "ok" | "fallback"; summary?: string | null; diff --git a/server/routers.ts b/server/routers.ts index e37d8ad..be67bcf 100644 --- a/server/routers.ts +++ b/server/routers.ts @@ -523,6 +523,54 @@ export const appRouter = router({ return { count: queued.length, queued }; }), + + retryRun: protectedProcedure + .input(z.object({ runId: z.number() })) + .mutation(async ({ ctx, input }) => { + const run = await db.getVisionTestRunById(input.runId); + if (!run) { + throw new TRPCError({ code: "NOT_FOUND", message: "Vision run not found" }); + } + if (ctx.user.role !== "admin" && run.userId !== ctx.user.id) { + throw new TRPCError({ code: "FORBIDDEN", message: "No permission to retry this vision run" }); + } + + await db.resetVisionTestRun(run.taskId); + await db.retryBackgroundTask(run.userId, run.taskId); + + if (ctx.user.role === "admin" && run.userId !== ctx.user.id) { + await auditAdminAction({ + adminUserId: ctx.user.id, + actionType: "vision_retry_run", + entityType: "vision_test_run", + entityId: String(run.id), + targetUserId: run.userId, + payload: { taskId: run.taskId, title: run.title }, + }); + } + + return { taskId: run.taskId, runId: run.id }; + }), + + retryFallbacks: adminProcedure + .input(z.object({ limit: z.number().min(1).max(100).default(20) }).optional()) + .mutation(async ({ ctx, input }) => { + const runs = await db.listRepairableVisionTestRuns(input?.limit ?? 20); + + for (const run of runs) { + await db.resetVisionTestRun(run.taskId); + await db.retryBackgroundTask(run.userId, run.taskId); + } + + await auditAdminAction({ + adminUserId: ctx.user.id, + actionType: "vision_retry_fallbacks", + entityType: "vision_test_run", + payload: { count: runs.length, runIds: runs.map((item) => item.id) }, + }); + + return { count: runs.length, runIds: runs.map((item) => item.id) }; + }), }), task: router({ diff --git a/server/taskWorker.ts b/server/taskWorker.ts index 866020d..98a0caa 100644 --- a/server/taskWorker.ts +++ b/server/taskWorker.ts @@ -13,6 +13,7 @@ import { } from "./prompts"; import { toPublicUrl } from "./publicUrl"; import { storagePut } from "./storage"; +import { extractStructuredJsonContent, normalizeMultimodalCorrectionReport } from "./vision"; import { normalizeAdjustedPlanResponse, normalizeTrainingPlanResponse, @@ -367,12 +368,7 @@ async function runMultimodalCorrectionTask(task: NonNullable) { schema: multimodalCorrectionSchema, }, }, - parse: (content) => { - if (typeof content === "string") { - return JSON.parse(content); - } - return content as Record; - }, + parse: (content) => normalizeMultimodalCorrectionReport(extractStructuredJsonContent(content)), }); const result = { diff --git a/server/vision.test.ts b/server/vision.test.ts new file mode 100644 index 0000000..ed93f7e --- /dev/null +++ b/server/vision.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from "vitest"; +import { extractStructuredJsonContent, normalizeMultimodalCorrectionReport } from "./vision"; + +describe("extractStructuredJsonContent", () => { + it("parses JSON wrapped in markdown code fences", () => { + const parsed = extractStructuredJsonContent("```json\n{\"summary\":\"ok\",\"drills\":[]}\n```"); + expect(parsed).toMatchObject({ summary: "ok", drills: [] }); + }); + + it("parses text content arrays returned by chat completions", () => { + const parsed = extractStructuredJsonContent([ + { type: "text", text: "{\"summary\":\"ok\",\"drills\":[]}" }, + ]); + expect(parsed).toMatchObject({ summary: "ok", drills: [] }); + }); +}); + +describe("normalizeMultimodalCorrectionReport", () => { + it("fills missing drill arrays so markdown rendering does not crash", () => { + const report = normalizeMultimodalCorrectionReport({ + summary: "反手动作可继续优化", + overallScore: 81, + confidence: 76, + drills: [ + { + name: "反手节奏重建", + purpose: "稳定击球点", + durationMinutes: 8, + }, + ], + }); + + expect(report.drills[0]?.steps.length).toBeGreaterThan(0); + expect(report.drills[0]?.coachingCues.length).toBeGreaterThan(0); + expect(report.nextSessionFocus.length).toBeGreaterThan(0); + expect(report.recommendedCaptureTips.length).toBeGreaterThan(0); + }); + + it("maps provider-specific phaseAssessment payloads into phase findings", () => { + const report = normalizeMultimodalCorrectionReport({ + phaseAssessment: { + preparation: "肩膀转动信息不足", + contact: "无法判断击球点", + }, + issueTags: ["补拍侧后方连续帧"], + recommendedCaptureTips: "提供连续关键帧", + }); + + expect(report.phaseFindings.length).toBe(2); + expect(report.summary).toContain("preparation"); + expect(report.nextSessionFocus).toContain("补拍侧后方连续帧"); + expect(report.recommendedCaptureTips).toContain("提供连续关键帧"); + }); +}); diff --git a/server/vision.ts b/server/vision.ts new file mode 100644 index 0000000..f106553 --- /dev/null +++ b/server/vision.ts @@ -0,0 +1,193 @@ +import type { Message } from "./_core/llm"; + +export type MultimodalCorrectionReport = { + summary: string; + overallScore: number; + confidence: number; + phaseFindings: Array<{ + phase: string; + score: number; + observation: string; + impact: string; + }>; + bodyPartFindings: Array<{ + bodyPart: string; + issue: string; + recommendation: string; + }>; + priorityFixes: Array<{ + title: string; + why: string; + howToPractice: string; + successMetric: string; + }>; + drills: Array<{ + name: string; + purpose: string; + durationMinutes: number; + steps: string[]; + coachingCues: string[]; + }>; + safetyRisks: string[]; + nextSessionFocus: string[]; + recommendedCaptureTips: string[]; +}; + +function toObject(value: unknown): Record { + return value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : {}; +} + +function toString(value: unknown, fallback: string) { + if (typeof value === "string" && value.trim().length > 0) { + return value.trim(); + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + return fallback; +} + +function toNumber(value: unknown, fallback: number) { + const parsed = typeof value === "number" ? value : Number(value); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function toStringArray(value: unknown, fallback: string[] = []) { + if (Array.isArray(value)) { + return value + .map((item) => toString(item, "")) + .map((item) => item.trim()) + .filter(Boolean); + } + if (typeof value === "string" && value.trim().length > 0) { + return [value.trim()]; + } + return fallback; +} + +function extractTextContent(content: unknown) { + if (typeof content === "string") { + return content.trim(); + } + + if (!Array.isArray(content)) { + return ""; + } + + return content + .map((part) => { + if (typeof part === "string") return part; + const record = part as Message["content"]; + if (record && typeof record === "object" && "type" in record && record.type === "text") { + return typeof record.text === "string" ? record.text : ""; + } + return ""; + }) + .join("\n") + .trim(); +} + +function extractJsonBlock(text: string) { + const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/i); + if (fencedMatch?.[1]) { + return fencedMatch[1].trim(); + } + + const objectStart = text.indexOf("{"); + const objectEnd = text.lastIndexOf("}"); + if (objectStart >= 0 && objectEnd > objectStart) { + return text.slice(objectStart, objectEnd + 1); + } + + return text; +} + +export function extractStructuredJsonContent(content: unknown) { + if (content && typeof content === "object" && !Array.isArray(content)) { + return content as Record; + } + + const text = extractTextContent(content); + if (!text) { + throw new Error("Vision model returned empty content"); + } + + const jsonText = extractJsonBlock(text); + return JSON.parse(jsonText) as Record; +} + +export function normalizeMultimodalCorrectionReport(raw: unknown): MultimodalCorrectionReport { + const source = toObject(raw); + const phaseAssessment = toObject(source.phaseAssessment); + const derivedPhaseFindings = + Array.isArray(source.phaseFindings) && source.phaseFindings.length > 0 + ? source.phaseFindings + : Object.entries(phaseAssessment).map(([phase, observation]) => ({ + phase, + score: 60, + observation: toString(observation, "当前图片信息不足,建议补充连续动作帧。"), + impact: "该阶段信息不足会限制系统对发力链条和节奏的判断。", + })); + const derivedSummary = + typeof source.summary === "string" && source.summary.trim().length > 0 + ? source.summary + : derivedPhaseFindings.length > 0 + ? `已完成图片审阅。当前可见结论:${derivedPhaseFindings + .slice(0, 2) + .map((item) => `${item.phase}${item.observation}`) + .join(";")}` + : "已完成图片审阅,请结合关键修正点继续训练。"; + + return { + summary: toString(derivedSummary, "已完成图片审阅,请结合关键修正点继续训练。"), + overallScore: Math.max(0, Math.min(100, toNumber(source.overallScore, 75))), + confidence: Math.max(0, Math.min(100, toNumber(source.confidence, 70))), + phaseFindings: derivedPhaseFindings.map((item, index) => { + const row = toObject(item); + return { + phase: toString(row.phase, `阶段 ${index + 1}`), + score: Math.max(0, Math.min(100, toNumber(row.score, 70))), + observation: toString(row.observation, "该阶段已完成基础识别。"), + impact: toString(row.impact, "建议结合连续视频继续观察动作节奏。"), + }; + }), + bodyPartFindings: Array.isArray(source.bodyPartFindings) + ? source.bodyPartFindings.map((item, index) => { + const row = toObject(item); + return { + bodyPart: toString(row.bodyPart, `部位 ${index + 1}`), + issue: toString(row.issue, "需要继续观察该部位的发力与稳定性。"), + recommendation: toString(row.recommendation, "下次拍摄时提供更完整角度并重复同类动作。"), + }; + }) + : [], + priorityFixes: Array.isArray(source.priorityFixes) + ? source.priorityFixes.map((item, index) => { + const row = toObject(item); + return { + title: toString(row.title, `修正重点 ${index + 1}`), + why: toString(row.why, "该问题会影响击球质量与动作稳定性。"), + howToPractice: toString(row.howToPractice, "请使用影子挥拍和定点重复练习进行修正。"), + successMetric: toString(row.successMetric, "连续 3 组动作保持稳定且节奏一致。"), + }; + }) + : [], + drills: Array.isArray(source.drills) + ? source.drills.map((item, index) => { + const row = toObject(item); + return { + name: toString(row.name, `练习 ${index + 1}`), + purpose: toString(row.purpose, "针对当前视觉识别出的重点问题做专项修正。"), + durationMinutes: Math.max(3, toNumber(row.durationMinutes, 8)), + steps: toStringArray(row.steps, ["从慢速影子挥拍开始,逐步加入完整节奏。"]), + coachingCues: toStringArray(row.coachingCues, ["保持击球点在身体前侧", "注意转体与重心传递"]), + }; + }) + : [], + safetyRisks: toStringArray(source.safetyRisks), + nextSessionFocus: toStringArray(source.nextSessionFocus, toStringArray(source.issueTags, ["保持同一动作连续拍摄 6-10 次"])), + recommendedCaptureTips: toStringArray(source.recommendedCaptureTips, ["保证全身入镜,并保持拍摄角度稳定"]), + }; +}