Repair multimodal vision parsing and rerun fallback history
这个提交包含在:
52
server/db.ts
52
server/db.ts
@@ -1,4 +1,4 @@
|
||||
import { eq, desc, and, asc, lte, gte, sql } from "drizzle-orm";
|
||||
import { eq, desc, and, asc, lte, gte, or, sql } from "drizzle-orm";
|
||||
import { drizzle } from "drizzle-orm/mysql2";
|
||||
import {
|
||||
InsertUser, users,
|
||||
@@ -1280,6 +1280,56 @@ export async function listVisionTestRuns(userId?: number, limit = 50) {
|
||||
.limit(limit);
|
||||
}
|
||||
|
||||
export async function getVisionTestRunById(runId: number) {
|
||||
const db = await getDb();
|
||||
if (!db) return null;
|
||||
|
||||
const [row] = await db.select({
|
||||
id: visionTestRuns.id,
|
||||
taskId: visionTestRuns.taskId,
|
||||
userId: visionTestRuns.userId,
|
||||
status: visionTestRuns.status,
|
||||
visionStatus: visionTestRuns.visionStatus,
|
||||
title: visionTestRuns.title,
|
||||
}).from(visionTestRuns)
|
||||
.where(eq(visionTestRuns.id, runId))
|
||||
.limit(1);
|
||||
|
||||
return row || null;
|
||||
}
|
||||
|
||||
export async function listRepairableVisionTestRuns(limit = 50) {
|
||||
const db = await getDb();
|
||||
if (!db) return [];
|
||||
|
||||
return db.select({
|
||||
id: visionTestRuns.id,
|
||||
taskId: visionTestRuns.taskId,
|
||||
userId: visionTestRuns.userId,
|
||||
title: visionTestRuns.title,
|
||||
status: visionTestRuns.status,
|
||||
visionStatus: visionTestRuns.visionStatus,
|
||||
}).from(visionTestRuns)
|
||||
.where(or(eq(visionTestRuns.visionStatus, "fallback"), eq(visionTestRuns.status, "failed")))
|
||||
.orderBy(desc(visionTestRuns.createdAt))
|
||||
.limit(limit);
|
||||
}
|
||||
|
||||
export async function resetVisionTestRun(taskId: string) {
|
||||
const db = await getDb();
|
||||
if (!db) return;
|
||||
|
||||
await db.update(visionTestRuns).set({
|
||||
status: "queued",
|
||||
visionStatus: "pending",
|
||||
summary: null,
|
||||
corrections: null,
|
||||
report: null,
|
||||
warning: null,
|
||||
error: null,
|
||||
}).where(eq(visionTestRuns.taskId, taskId));
|
||||
}
|
||||
|
||||
export async function completeVisionTestRun(taskId: string, data: {
|
||||
visionStatus: "ok" | "fallback";
|
||||
summary?: string | null;
|
||||
|
||||
@@ -523,6 +523,54 @@ export const appRouter = router({
|
||||
|
||||
return { count: queued.length, queued };
|
||||
}),
|
||||
|
||||
retryRun: protectedProcedure
|
||||
.input(z.object({ runId: z.number() }))
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
const run = await db.getVisionTestRunById(input.runId);
|
||||
if (!run) {
|
||||
throw new TRPCError({ code: "NOT_FOUND", message: "Vision run not found" });
|
||||
}
|
||||
if (ctx.user.role !== "admin" && run.userId !== ctx.user.id) {
|
||||
throw new TRPCError({ code: "FORBIDDEN", message: "No permission to retry this vision run" });
|
||||
}
|
||||
|
||||
await db.resetVisionTestRun(run.taskId);
|
||||
await db.retryBackgroundTask(run.userId, run.taskId);
|
||||
|
||||
if (ctx.user.role === "admin" && run.userId !== ctx.user.id) {
|
||||
await auditAdminAction({
|
||||
adminUserId: ctx.user.id,
|
||||
actionType: "vision_retry_run",
|
||||
entityType: "vision_test_run",
|
||||
entityId: String(run.id),
|
||||
targetUserId: run.userId,
|
||||
payload: { taskId: run.taskId, title: run.title },
|
||||
});
|
||||
}
|
||||
|
||||
return { taskId: run.taskId, runId: run.id };
|
||||
}),
|
||||
|
||||
retryFallbacks: adminProcedure
|
||||
.input(z.object({ limit: z.number().min(1).max(100).default(20) }).optional())
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
const runs = await db.listRepairableVisionTestRuns(input?.limit ?? 20);
|
||||
|
||||
for (const run of runs) {
|
||||
await db.resetVisionTestRun(run.taskId);
|
||||
await db.retryBackgroundTask(run.userId, run.taskId);
|
||||
}
|
||||
|
||||
await auditAdminAction({
|
||||
adminUserId: ctx.user.id,
|
||||
actionType: "vision_retry_fallbacks",
|
||||
entityType: "vision_test_run",
|
||||
payload: { count: runs.length, runIds: runs.map((item) => item.id) },
|
||||
});
|
||||
|
||||
return { count: runs.length, runIds: runs.map((item) => item.id) };
|
||||
}),
|
||||
}),
|
||||
|
||||
task: router({
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
} from "./prompts";
|
||||
import { toPublicUrl } from "./publicUrl";
|
||||
import { storagePut } from "./storage";
|
||||
import { extractStructuredJsonContent, normalizeMultimodalCorrectionReport } from "./vision";
|
||||
import {
|
||||
normalizeAdjustedPlanResponse,
|
||||
normalizeTrainingPlanResponse,
|
||||
@@ -367,12 +368,7 @@ async function runMultimodalCorrectionTask(task: NonNullable<TaskRow>) {
|
||||
schema: multimodalCorrectionSchema,
|
||||
},
|
||||
},
|
||||
parse: (content) => {
|
||||
if (typeof content === "string") {
|
||||
return JSON.parse(content);
|
||||
}
|
||||
return content as Record<string, unknown>;
|
||||
},
|
||||
parse: (content) => normalizeMultimodalCorrectionReport(extractStructuredJsonContent(content)),
|
||||
});
|
||||
|
||||
const result = {
|
||||
|
||||
54
server/vision.test.ts
普通文件
54
server/vision.test.ts
普通文件
@@ -0,0 +1,54 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { extractStructuredJsonContent, normalizeMultimodalCorrectionReport } from "./vision";
|
||||
|
||||
describe("extractStructuredJsonContent", () => {
|
||||
it("parses JSON wrapped in markdown code fences", () => {
|
||||
const parsed = extractStructuredJsonContent("```json\n{\"summary\":\"ok\",\"drills\":[]}\n```");
|
||||
expect(parsed).toMatchObject({ summary: "ok", drills: [] });
|
||||
});
|
||||
|
||||
it("parses text content arrays returned by chat completions", () => {
|
||||
const parsed = extractStructuredJsonContent([
|
||||
{ type: "text", text: "{\"summary\":\"ok\",\"drills\":[]}" },
|
||||
]);
|
||||
expect(parsed).toMatchObject({ summary: "ok", drills: [] });
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeMultimodalCorrectionReport", () => {
|
||||
it("fills missing drill arrays so markdown rendering does not crash", () => {
|
||||
const report = normalizeMultimodalCorrectionReport({
|
||||
summary: "反手动作可继续优化",
|
||||
overallScore: 81,
|
||||
confidence: 76,
|
||||
drills: [
|
||||
{
|
||||
name: "反手节奏重建",
|
||||
purpose: "稳定击球点",
|
||||
durationMinutes: 8,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(report.drills[0]?.steps.length).toBeGreaterThan(0);
|
||||
expect(report.drills[0]?.coachingCues.length).toBeGreaterThan(0);
|
||||
expect(report.nextSessionFocus.length).toBeGreaterThan(0);
|
||||
expect(report.recommendedCaptureTips.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("maps provider-specific phaseAssessment payloads into phase findings", () => {
|
||||
const report = normalizeMultimodalCorrectionReport({
|
||||
phaseAssessment: {
|
||||
preparation: "肩膀转动信息不足",
|
||||
contact: "无法判断击球点",
|
||||
},
|
||||
issueTags: ["补拍侧后方连续帧"],
|
||||
recommendedCaptureTips: "提供连续关键帧",
|
||||
});
|
||||
|
||||
expect(report.phaseFindings.length).toBe(2);
|
||||
expect(report.summary).toContain("preparation");
|
||||
expect(report.nextSessionFocus).toContain("补拍侧后方连续帧");
|
||||
expect(report.recommendedCaptureTips).toContain("提供连续关键帧");
|
||||
});
|
||||
});
|
||||
193
server/vision.ts
普通文件
193
server/vision.ts
普通文件
@@ -0,0 +1,193 @@
|
||||
import type { Message } from "./_core/llm";
|
||||
|
||||
export type MultimodalCorrectionReport = {
|
||||
summary: string;
|
||||
overallScore: number;
|
||||
confidence: number;
|
||||
phaseFindings: Array<{
|
||||
phase: string;
|
||||
score: number;
|
||||
observation: string;
|
||||
impact: string;
|
||||
}>;
|
||||
bodyPartFindings: Array<{
|
||||
bodyPart: string;
|
||||
issue: string;
|
||||
recommendation: string;
|
||||
}>;
|
||||
priorityFixes: Array<{
|
||||
title: string;
|
||||
why: string;
|
||||
howToPractice: string;
|
||||
successMetric: string;
|
||||
}>;
|
||||
drills: Array<{
|
||||
name: string;
|
||||
purpose: string;
|
||||
durationMinutes: number;
|
||||
steps: string[];
|
||||
coachingCues: string[];
|
||||
}>;
|
||||
safetyRisks: string[];
|
||||
nextSessionFocus: string[];
|
||||
recommendedCaptureTips: string[];
|
||||
};
|
||||
|
||||
function toObject(value: unknown): Record<string, unknown> {
|
||||
return value && typeof value === "object" && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: {};
|
||||
}
|
||||
|
||||
function toString(value: unknown, fallback: string) {
|
||||
if (typeof value === "string" && value.trim().length > 0) {
|
||||
return value.trim();
|
||||
}
|
||||
if (typeof value === "number" || typeof value === "boolean") {
|
||||
return String(value);
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function toNumber(value: unknown, fallback: number) {
|
||||
const parsed = typeof value === "number" ? value : Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : fallback;
|
||||
}
|
||||
|
||||
function toStringArray(value: unknown, fallback: string[] = []) {
|
||||
if (Array.isArray(value)) {
|
||||
return value
|
||||
.map((item) => toString(item, ""))
|
||||
.map((item) => item.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
if (typeof value === "string" && value.trim().length > 0) {
|
||||
return [value.trim()];
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function extractTextContent(content: unknown) {
|
||||
if (typeof content === "string") {
|
||||
return content.trim();
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return content
|
||||
.map((part) => {
|
||||
if (typeof part === "string") return part;
|
||||
const record = part as Message["content"];
|
||||
if (record && typeof record === "object" && "type" in record && record.type === "text") {
|
||||
return typeof record.text === "string" ? record.text : "";
|
||||
}
|
||||
return "";
|
||||
})
|
||||
.join("\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function extractJsonBlock(text: string) {
|
||||
const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
||||
if (fencedMatch?.[1]) {
|
||||
return fencedMatch[1].trim();
|
||||
}
|
||||
|
||||
const objectStart = text.indexOf("{");
|
||||
const objectEnd = text.lastIndexOf("}");
|
||||
if (objectStart >= 0 && objectEnd > objectStart) {
|
||||
return text.slice(objectStart, objectEnd + 1);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
export function extractStructuredJsonContent(content: unknown) {
|
||||
if (content && typeof content === "object" && !Array.isArray(content)) {
|
||||
return content as Record<string, unknown>;
|
||||
}
|
||||
|
||||
const text = extractTextContent(content);
|
||||
if (!text) {
|
||||
throw new Error("Vision model returned empty content");
|
||||
}
|
||||
|
||||
const jsonText = extractJsonBlock(text);
|
||||
return JSON.parse(jsonText) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
export function normalizeMultimodalCorrectionReport(raw: unknown): MultimodalCorrectionReport {
|
||||
const source = toObject(raw);
|
||||
const phaseAssessment = toObject(source.phaseAssessment);
|
||||
const derivedPhaseFindings =
|
||||
Array.isArray(source.phaseFindings) && source.phaseFindings.length > 0
|
||||
? source.phaseFindings
|
||||
: Object.entries(phaseAssessment).map(([phase, observation]) => ({
|
||||
phase,
|
||||
score: 60,
|
||||
observation: toString(observation, "当前图片信息不足,建议补充连续动作帧。"),
|
||||
impact: "该阶段信息不足会限制系统对发力链条和节奏的判断。",
|
||||
}));
|
||||
const derivedSummary =
|
||||
typeof source.summary === "string" && source.summary.trim().length > 0
|
||||
? source.summary
|
||||
: derivedPhaseFindings.length > 0
|
||||
? `已完成图片审阅。当前可见结论:${derivedPhaseFindings
|
||||
.slice(0, 2)
|
||||
.map((item) => `${item.phase}${item.observation}`)
|
||||
.join(";")}`
|
||||
: "已完成图片审阅,请结合关键修正点继续训练。";
|
||||
|
||||
return {
|
||||
summary: toString(derivedSummary, "已完成图片审阅,请结合关键修正点继续训练。"),
|
||||
overallScore: Math.max(0, Math.min(100, toNumber(source.overallScore, 75))),
|
||||
confidence: Math.max(0, Math.min(100, toNumber(source.confidence, 70))),
|
||||
phaseFindings: derivedPhaseFindings.map((item, index) => {
|
||||
const row = toObject(item);
|
||||
return {
|
||||
phase: toString(row.phase, `阶段 ${index + 1}`),
|
||||
score: Math.max(0, Math.min(100, toNumber(row.score, 70))),
|
||||
observation: toString(row.observation, "该阶段已完成基础识别。"),
|
||||
impact: toString(row.impact, "建议结合连续视频继续观察动作节奏。"),
|
||||
};
|
||||
}),
|
||||
bodyPartFindings: Array.isArray(source.bodyPartFindings)
|
||||
? source.bodyPartFindings.map((item, index) => {
|
||||
const row = toObject(item);
|
||||
return {
|
||||
bodyPart: toString(row.bodyPart, `部位 ${index + 1}`),
|
||||
issue: toString(row.issue, "需要继续观察该部位的发力与稳定性。"),
|
||||
recommendation: toString(row.recommendation, "下次拍摄时提供更完整角度并重复同类动作。"),
|
||||
};
|
||||
})
|
||||
: [],
|
||||
priorityFixes: Array.isArray(source.priorityFixes)
|
||||
? source.priorityFixes.map((item, index) => {
|
||||
const row = toObject(item);
|
||||
return {
|
||||
title: toString(row.title, `修正重点 ${index + 1}`),
|
||||
why: toString(row.why, "该问题会影响击球质量与动作稳定性。"),
|
||||
howToPractice: toString(row.howToPractice, "请使用影子挥拍和定点重复练习进行修正。"),
|
||||
successMetric: toString(row.successMetric, "连续 3 组动作保持稳定且节奏一致。"),
|
||||
};
|
||||
})
|
||||
: [],
|
||||
drills: Array.isArray(source.drills)
|
||||
? source.drills.map((item, index) => {
|
||||
const row = toObject(item);
|
||||
return {
|
||||
name: toString(row.name, `练习 ${index + 1}`),
|
||||
purpose: toString(row.purpose, "针对当前视觉识别出的重点问题做专项修正。"),
|
||||
durationMinutes: Math.max(3, toNumber(row.durationMinutes, 8)),
|
||||
steps: toStringArray(row.steps, ["从慢速影子挥拍开始,逐步加入完整节奏。"]),
|
||||
coachingCues: toStringArray(row.coachingCues, ["保持击球点在身体前侧", "注意转体与重心传递"]),
|
||||
};
|
||||
})
|
||||
: [],
|
||||
safetyRisks: toStringArray(source.safetyRisks),
|
||||
nextSessionFocus: toStringArray(source.nextSessionFocus, toStringArray(source.issueTags, ["保持同一动作连续拍摄 6-10 次"])),
|
||||
recommendedCaptureTips: toStringArray(source.recommendedCaptureTips, ["保证全身入镜,并保持拍摄角度稳定"]),
|
||||
};
|
||||
}
|
||||
在新工单中引用
屏蔽一个用户