Improve live analysis stability and video clip drafting

2026-03-15 02:11:34 +08:00
--- a/client/src/pages/LiveCamera.tsx
+++ b/client/src/pages/LiveCamera.tsx
@@ -76,6 +76,11 @@ type AnalyzedFrame = {
  feedback: string[];
 };

+type ActionObservation = {
+  action: ActionType;
+  confidence: number;
+};
+
 const ACTION_META: Record<ActionType, { label: string; tone: string; accent: string }> = {
  forehand: { label: "正手挥拍", tone: "bg-emerald-500/10 text-emerald-700", accent: "bg-emerald-500" },
  backhand: { label: "反手挥拍", tone: "bg-sky-500/10 text-sky-700", accent: "bg-sky-500" },
@@ -184,6 +189,55 @@ function createSegment(action: ActionType, elapsedMs: number, frame: AnalyzedFra
  };
 }

+function stabilizeAnalyzedFrame(frame: AnalyzedFrame, history: ActionObservation[]): AnalyzedFrame {
+  const nextHistory = [...history, { action: frame.action, confidence: frame.confidence }].slice(-6);
+  history.splice(0, history.length, ...nextHistory);
+
+  const weights = nextHistory.map((_, index) => index + 1);
+  const actionScores = nextHistory.reduce<Record<ActionType, number>>((acc, sample, index) => {
+    const weighted = sample.confidence * weights[index];
+    acc[sample.action] = (acc[sample.action] || 0) + weighted;
+    return acc;
+  }, {
+    forehand: 0,
+    backhand: 0,
+    serve: 0,
+    volley: 0,
+    overhead: 0,
+    slice: 0,
+    lob: 0,
+    unknown: 0,
+  });
+
+  const ranked = Object.entries(actionScores).sort((a, b) => b[1] - a[1]) as Array<[ActionType, number]>;
+  const [winner = "unknown", winnerScore = 0] = ranked[0] || [];
+  const [, runnerScore = 0] = ranked[1] || [];
+  const winnerSamples = nextHistory.filter((sample) => sample.action === winner);
+  const averageConfidence = winnerSamples.length > 0
+    ? winnerSamples.reduce((sum, sample) => sum + sample.confidence, 0) / winnerSamples.length
+    : frame.confidence;
+
+  const stableAction =
+    winner === "unknown" && frame.action !== "unknown" && frame.confidence >= 0.52
+      ? frame.action
+      : winnerScore - runnerScore < 0.2 && frame.confidence >= 0.65
+        ? frame.action
+        : winner;
+
+  const stableConfidence = stableAction === frame.action
+    ? Math.max(frame.confidence, averageConfidence)
+    : averageConfidence;
+
+  return {
+    ...frame,
+    action: stableAction,
+    confidence: clamp(stableConfidence, 0, 1),
+    feedback: stableAction === "unknown"
+      ? ["系统正在继续观察，当前窗口内未形成稳定动作特征。", ...frame.feedback].slice(0, 3)
+      : frame.feedback,
+  };
+}
+
 function analyzePoseFrame(landmarks: Point[], tracking: TrackingState, timestamp: number): AnalyzedFrame {
  const nose = landmarks[0];
  const leftShoulder = landmarks[11];
@@ -428,6 +482,7 @@ export default function LiveCamera() {
  const animationRef = useRef<number>(0);
  const sessionStartedAtRef = useRef<number>(0);
  const trackingRef = useRef<TrackingState>({});
+  const actionHistoryRef = useRef<ActionObservation[]>([]);
  const currentSegmentRef = useRef<ActionSegment | null>(null);
  const segmentsRef = useRef<ActionSegment[]>([]);
  const frameSamplesRef = useRef<PoseScore[]>([]);
@@ -746,6 +801,7 @@ export default function LiveCamera() {
    segmentsRef.current = [];
    currentSegmentRef.current = null;
    trackingRef.current = {};
+    actionHistoryRef.current = [];
    frameSamplesRef.current = [];
    sessionStartedAtRef.current = Date.now();
    setDurationMs(0);
@@ -785,7 +841,10 @@ export default function LiveCamera() {
        drawOverlay(canvas, results.poseLandmarks);
        if (!results.poseLandmarks) return;

-        const analyzed = analyzePoseFrame(results.poseLandmarks, trackingRef.current, performance.now());
+        const analyzed = stabilizeAnalyzedFrame(
+          analyzePoseFrame(results.poseLandmarks, trackingRef.current, performance.now()),
+          actionHistoryRef.current,
+        );
        const elapsedMs = Date.now() - sessionStartedAtRef.current;
        appendFrameToSegment(analyzed, elapsedMs);
        frameSamplesRef.current.push(analyzed.score);