Repair multimodal vision parsing and rerun fallback history

这个提交包含在:
cryptocommuniums-afk
2026-03-15 02:31:44 +08:00
父节点 f4f425de42
当前提交 ae93269c62
修改 10 个文件,包含 417 行新增21 行删除

193
server/vision.ts 普通文件
查看文件

@@ -0,0 +1,193 @@
import type { Message } from "./_core/llm";
export type MultimodalCorrectionReport = {
summary: string;
overallScore: number;
confidence: number;
phaseFindings: Array<{
phase: string;
score: number;
observation: string;
impact: string;
}>;
bodyPartFindings: Array<{
bodyPart: string;
issue: string;
recommendation: string;
}>;
priorityFixes: Array<{
title: string;
why: string;
howToPractice: string;
successMetric: string;
}>;
drills: Array<{
name: string;
purpose: string;
durationMinutes: number;
steps: string[];
coachingCues: string[];
}>;
safetyRisks: string[];
nextSessionFocus: string[];
recommendedCaptureTips: string[];
};
function toObject(value: unknown): Record<string, unknown> {
return value && typeof value === "object" && !Array.isArray(value)
? (value as Record<string, unknown>)
: {};
}
function toString(value: unknown, fallback: string) {
if (typeof value === "string" && value.trim().length > 0) {
return value.trim();
}
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
return fallback;
}
function toNumber(value: unknown, fallback: number) {
const parsed = typeof value === "number" ? value : Number(value);
return Number.isFinite(parsed) ? parsed : fallback;
}
function toStringArray(value: unknown, fallback: string[] = []) {
if (Array.isArray(value)) {
return value
.map((item) => toString(item, ""))
.map((item) => item.trim())
.filter(Boolean);
}
if (typeof value === "string" && value.trim().length > 0) {
return [value.trim()];
}
return fallback;
}
function extractTextContent(content: unknown) {
if (typeof content === "string") {
return content.trim();
}
if (!Array.isArray(content)) {
return "";
}
return content
.map((part) => {
if (typeof part === "string") return part;
const record = part as Message["content"];
if (record && typeof record === "object" && "type" in record && record.type === "text") {
return typeof record.text === "string" ? record.text : "";
}
return "";
})
.join("\n")
.trim();
}
function extractJsonBlock(text: string) {
const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
if (fencedMatch?.[1]) {
return fencedMatch[1].trim();
}
const objectStart = text.indexOf("{");
const objectEnd = text.lastIndexOf("}");
if (objectStart >= 0 && objectEnd > objectStart) {
return text.slice(objectStart, objectEnd + 1);
}
return text;
}
export function extractStructuredJsonContent(content: unknown) {
if (content && typeof content === "object" && !Array.isArray(content)) {
return content as Record<string, unknown>;
}
const text = extractTextContent(content);
if (!text) {
throw new Error("Vision model returned empty content");
}
const jsonText = extractJsonBlock(text);
return JSON.parse(jsonText) as Record<string, unknown>;
}
export function normalizeMultimodalCorrectionReport(raw: unknown): MultimodalCorrectionReport {
const source = toObject(raw);
const phaseAssessment = toObject(source.phaseAssessment);
const derivedPhaseFindings =
Array.isArray(source.phaseFindings) && source.phaseFindings.length > 0
? source.phaseFindings
: Object.entries(phaseAssessment).map(([phase, observation]) => ({
phase,
score: 60,
observation: toString(observation, "当前图片信息不足,建议补充连续动作帧。"),
impact: "该阶段信息不足会限制系统对发力链条和节奏的判断。",
}));
const derivedSummary =
typeof source.summary === "string" && source.summary.trim().length > 0
? source.summary
: derivedPhaseFindings.length > 0
? `已完成图片审阅。当前可见结论:${derivedPhaseFindings
.slice(0, 2)
.map((item) => `${item.phase}${item.observation}`)
.join(";")}`
: "已完成图片审阅,请结合关键修正点继续训练。";
return {
summary: toString(derivedSummary, "已完成图片审阅,请结合关键修正点继续训练。"),
overallScore: Math.max(0, Math.min(100, toNumber(source.overallScore, 75))),
confidence: Math.max(0, Math.min(100, toNumber(source.confidence, 70))),
phaseFindings: derivedPhaseFindings.map((item, index) => {
const row = toObject(item);
return {
phase: toString(row.phase, `阶段 ${index + 1}`),
score: Math.max(0, Math.min(100, toNumber(row.score, 70))),
observation: toString(row.observation, "该阶段已完成基础识别。"),
impact: toString(row.impact, "建议结合连续视频继续观察动作节奏。"),
};
}),
bodyPartFindings: Array.isArray(source.bodyPartFindings)
? source.bodyPartFindings.map((item, index) => {
const row = toObject(item);
return {
bodyPart: toString(row.bodyPart, `部位 ${index + 1}`),
issue: toString(row.issue, "需要继续观察该部位的发力与稳定性。"),
recommendation: toString(row.recommendation, "下次拍摄时提供更完整角度并重复同类动作。"),
};
})
: [],
priorityFixes: Array.isArray(source.priorityFixes)
? source.priorityFixes.map((item, index) => {
const row = toObject(item);
return {
title: toString(row.title, `修正重点 ${index + 1}`),
why: toString(row.why, "该问题会影响击球质量与动作稳定性。"),
howToPractice: toString(row.howToPractice, "请使用影子挥拍和定点重复练习进行修正。"),
successMetric: toString(row.successMetric, "连续 3 组动作保持稳定且节奏一致。"),
};
})
: [],
drills: Array.isArray(source.drills)
? source.drills.map((item, index) => {
const row = toObject(item);
return {
name: toString(row.name, `练习 ${index + 1}`),
purpose: toString(row.purpose, "针对当前视觉识别出的重点问题做专项修正。"),
durationMinutes: Math.max(3, toNumber(row.durationMinutes, 8)),
steps: toStringArray(row.steps, ["从慢速影子挥拍开始,逐步加入完整节奏。"]),
coachingCues: toStringArray(row.coachingCues, ["保持击球点在身体前侧", "注意转体与重心传递"]),
};
})
: [],
safetyRisks: toStringArray(source.safetyRisks),
nextSessionFocus: toStringArray(source.nextSessionFocus, toStringArray(source.issueTags, ["保持同一动作连续拍摄 6-10 次"])),
recommendedCaptureTips: toStringArray(source.recommendedCaptureTips, ["保证全身入镜,并保持拍摄角度稳定"]),
};
}