Repair multimodal vision parsing and rerun fallback history

2026-03-15 02:31:44 +08:00
--- a/server/vision.ts
+++ b/server/vision.ts
@@ -0,0 +1,193 @@
+import type { Message } from "./_core/llm";
+
+export type MultimodalCorrectionReport = {
+  summary: string;
+  overallScore: number;
+  confidence: number;
+  phaseFindings: Array<{
+    phase: string;
+    score: number;
+    observation: string;
+    impact: string;
+  }>;
+  bodyPartFindings: Array<{
+    bodyPart: string;
+    issue: string;
+    recommendation: string;
+  }>;
+  priorityFixes: Array<{
+    title: string;
+    why: string;
+    howToPractice: string;
+    successMetric: string;
+  }>;
+  drills: Array<{
+    name: string;
+    purpose: string;
+    durationMinutes: number;
+    steps: string[];
+    coachingCues: string[];
+  }>;
+  safetyRisks: string[];
+  nextSessionFocus: string[];
+  recommendedCaptureTips: string[];
+};
+
+function toObject(value: unknown): Record<string, unknown> {
+  return value && typeof value === "object" && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : {};
+}
+
+function toString(value: unknown, fallback: string) {
+  if (typeof value === "string" && value.trim().length > 0) {
+    return value.trim();
+  }
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+  return fallback;
+}
+
+function toNumber(value: unknown, fallback: number) {
+  const parsed = typeof value === "number" ? value : Number(value);
+  return Number.isFinite(parsed) ? parsed : fallback;
+}
+
+function toStringArray(value: unknown, fallback: string[] = []) {
+  if (Array.isArray(value)) {
+    return value
+      .map((item) => toString(item, ""))
+      .map((item) => item.trim())
+      .filter(Boolean);
+  }
+  if (typeof value === "string" && value.trim().length > 0) {
+    return [value.trim()];
+  }
+  return fallback;
+}
+
+function extractTextContent(content: unknown) {
+  if (typeof content === "string") {
+    return content.trim();
+  }
+
+  if (!Array.isArray(content)) {
+    return "";
+  }
+
+  return content
+    .map((part) => {
+      if (typeof part === "string") return part;
+      const record = part as Message["content"];
+      if (record && typeof record === "object" && "type" in record && record.type === "text") {
+        return typeof record.text === "string" ? record.text : "";
+      }
+      return "";
+    })
+    .join("\n")
+    .trim();
+}
+
+function extractJsonBlock(text: string) {
+  const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
+  if (fencedMatch?.[1]) {
+    return fencedMatch[1].trim();
+  }
+
+  const objectStart = text.indexOf("{");
+  const objectEnd = text.lastIndexOf("}");
+  if (objectStart >= 0 && objectEnd > objectStart) {
+    return text.slice(objectStart, objectEnd + 1);
+  }
+
+  return text;
+}
+
+export function extractStructuredJsonContent(content: unknown) {
+  if (content && typeof content === "object" && !Array.isArray(content)) {
+    return content as Record<string, unknown>;
+  }
+
+  const text = extractTextContent(content);
+  if (!text) {
+    throw new Error("Vision model returned empty content");
+  }
+
+  const jsonText = extractJsonBlock(text);
+  return JSON.parse(jsonText) as Record<string, unknown>;
+}
+
+export function normalizeMultimodalCorrectionReport(raw: unknown): MultimodalCorrectionReport {
+  const source = toObject(raw);
+  const phaseAssessment = toObject(source.phaseAssessment);
+  const derivedPhaseFindings =
+    Array.isArray(source.phaseFindings) && source.phaseFindings.length > 0
+      ? source.phaseFindings
+      : Object.entries(phaseAssessment).map(([phase, observation]) => ({
+          phase,
+          score: 60,
+          observation: toString(observation, "当前图片信息不足，建议补充连续动作帧。"),
+          impact: "该阶段信息不足会限制系统对发力链条和节奏的判断。",
+        }));
+  const derivedSummary =
+    typeof source.summary === "string" && source.summary.trim().length > 0
+      ? source.summary
+      : derivedPhaseFindings.length > 0
+      ? `已完成图片审阅。当前可见结论：${derivedPhaseFindings
+          .slice(0, 2)
+          .map((item) => `${item.phase}${item.observation}`)
+          .join("；")}`
+      : "已完成图片审阅，请结合关键修正点继续训练。";
+
+  return {
+    summary: toString(derivedSummary, "已完成图片审阅，请结合关键修正点继续训练。"),
+    overallScore: Math.max(0, Math.min(100, toNumber(source.overallScore, 75))),
+    confidence: Math.max(0, Math.min(100, toNumber(source.confidence, 70))),
+    phaseFindings: derivedPhaseFindings.map((item, index) => {
+          const row = toObject(item);
+          return {
+            phase: toString(row.phase, `阶段 ${index + 1}`),
+            score: Math.max(0, Math.min(100, toNumber(row.score, 70))),
+            observation: toString(row.observation, "该阶段已完成基础识别。"),
+            impact: toString(row.impact, "建议结合连续视频继续观察动作节奏。"),
+          };
+        }),
+    bodyPartFindings: Array.isArray(source.bodyPartFindings)
+      ? source.bodyPartFindings.map((item, index) => {
+          const row = toObject(item);
+          return {
+            bodyPart: toString(row.bodyPart, `部位 ${index + 1}`),
+            issue: toString(row.issue, "需要继续观察该部位的发力与稳定性。"),
+            recommendation: toString(row.recommendation, "下次拍摄时提供更完整角度并重复同类动作。"),
+          };
+        })
+      : [],
+    priorityFixes: Array.isArray(source.priorityFixes)
+      ? source.priorityFixes.map((item, index) => {
+          const row = toObject(item);
+          return {
+            title: toString(row.title, `修正重点 ${index + 1}`),
+            why: toString(row.why, "该问题会影响击球质量与动作稳定性。"),
+            howToPractice: toString(row.howToPractice, "请使用影子挥拍和定点重复练习进行修正。"),
+            successMetric: toString(row.successMetric, "连续 3 组动作保持稳定且节奏一致。"),
+          };
+        })
+      : [],
+    drills: Array.isArray(source.drills)
+      ? source.drills.map((item, index) => {
+          const row = toObject(item);
+          return {
+            name: toString(row.name, `练习 ${index + 1}`),
+            purpose: toString(row.purpose, "针对当前视觉识别出的重点问题做专项修正。"),
+            durationMinutes: Math.max(3, toNumber(row.durationMinutes, 8)),
+            steps: toStringArray(row.steps, ["从慢速影子挥拍开始，逐步加入完整节奏。"]),
+            coachingCues: toStringArray(row.coachingCues, ["保持击球点在身体前侧", "注意转体与重心传递"]),
+          };
+        })
+      : [],
+    safetyRisks: toStringArray(source.safetyRisks),
+    nextSessionFocus: toStringArray(source.nextSessionFocus, toStringArray(source.issueTags, ["保持同一动作连续拍摄 6-10 次"])),
+    recommendedCaptureTips: toStringArray(source.recommendedCaptureTips, ["保证全身入镜，并保持拍摄角度稳定"]),
+  };
+}