From 139dc61b6158403d2a4b9d430c234734e38a8041 Mon Sep 17 00:00:00 2001 From: cryptocommuniums-afk Date: Sun, 15 Mar 2026 21:03:06 +0800 Subject: [PATCH] Fix live camera gorilla avatar preset --- client/src/lib/liveCamera.test.ts | 115 +++++++ client/src/lib/liveCamera.ts | 514 ++++++++++++++++++++++++++++++ client/src/pages/LiveCamera.tsx | 366 ++++++++++++++------- docs/CHANGELOG.md | 22 ++ tests/e2e/app.spec.ts | 7 +- 5 files changed, 907 insertions(+), 117 deletions(-) create mode 100644 client/src/lib/liveCamera.test.ts create mode 100644 client/src/lib/liveCamera.ts diff --git a/client/src/lib/liveCamera.test.ts b/client/src/lib/liveCamera.test.ts new file mode 100644 index 0000000..67bbf0d --- /dev/null +++ b/client/src/lib/liveCamera.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from "vitest"; +import { + ACTION_WINDOW_FRAMES, + createStableActionState, + getAvatarAnchors, + resolveAvatarKeyFromPrompt, + stabilizeActionStream, + type FrameActionSample, +} from "./liveCamera"; + +function feedSamples(samples: Array>, intervalMs = 33) { + const history: FrameActionSample[] = []; + const state = createStableActionState(); + let lastResult = null as ReturnType | null; + + samples.forEach((sample, index) => { + lastResult = stabilizeActionStream( + { + ...sample, + timestamp: index * intervalMs, + }, + history, + state, + ); + }); + + return { history, state, lastResult }; +} + +describe("live camera action stabilizer", () => { + it("locks a dominant action after a full temporal window", () => { + const samples = Array.from({ length: ACTION_WINDOW_FRAMES * 2 }, () => ({ + action: "forehand" as const, + confidence: 0.84, + })); + const { lastResult } = feedSamples(samples); + + expect(lastResult?.stableAction).toBe("forehand"); + expect(lastResult?.windowAction).toBe("forehand"); + expect(lastResult?.pending).toBe(false); + expect(lastResult?.windowShare).toBeGreaterThan(0.9); + }); + + it("ignores brief action spikes and keeps the stable action", () => { + const stableFrames = Array.from({ length: ACTION_WINDOW_FRAMES * 2 }, () => ({ + action: "forehand" as const, + confidence: 0.82, + })); + const noisyFrames = Array.from({ length: 5 }, () => ({ + action: "backhand" as const, + confidence: 0.88, + })); + const { lastResult } = feedSamples([...stableFrames, ...noisyFrames]); + + expect(lastResult?.stableAction).toBe("forehand"); + expect(lastResult?.pending).toBe(false); + }); + + it("switches only after the next action persists long enough", () => { + const forehandFrames = Array.from({ length: ACTION_WINDOW_FRAMES * 2 }, () => ({ + action: "forehand" as const, + confidence: 0.8, + })); + const backhandFrames = Array.from({ length: ACTION_WINDOW_FRAMES * 2 }, () => ({ + action: "backhand" as const, + confidence: 0.85, + })); + const { lastResult, state } = feedSamples([...forehandFrames, ...backhandFrames]); + + expect(lastResult?.stableAction).toBe("backhand"); + expect(state.switchCount).toBeGreaterThanOrEqual(2); + }); + + it("requires a longer delay before falling back to unknown", () => { + const forehandFrames = Array.from({ length: ACTION_WINDOW_FRAMES * 2 }, () => ({ + action: "forehand" as const, + confidence: 0.83, + })); + const unknownFrames = Array.from({ length: 10 }, () => ({ + action: "unknown" as const, + confidence: 0.4, + })); + const { lastResult } = feedSamples([...forehandFrames, ...unknownFrames]); + + expect(lastResult?.stableAction).toBe("forehand"); + }); +}); + +describe("live camera avatar helpers", () => { + it("maps prompt keywords into avatar presets", () => { + expect(resolveAvatarKeyFromPrompt("切换成猩猩形象", "gorilla")).toBe("gorilla"); + expect(resolveAvatarKeyFromPrompt("dog mascot", "gorilla")).toBe("dog"); + expect(resolveAvatarKeyFromPrompt("", "pig")).toBe("pig"); + }); + + it("builds avatar anchors from pose landmarks", () => { + const landmarks = Array.from({ length: 33 }, () => ({ x: 0.5, y: 0.5, visibility: 0.95 })); + landmarks[0] = { x: 0.5, y: 0.16, visibility: 0.99 }; + landmarks[11] = { x: 0.4, y: 0.3, visibility: 0.99 }; + landmarks[12] = { x: 0.6, y: 0.3, visibility: 0.99 }; + landmarks[15] = { x: 0.28, y: 0.44, visibility: 0.99 }; + landmarks[16] = { x: 0.72, y: 0.44, visibility: 0.99 }; + landmarks[23] = { x: 0.44, y: 0.58, visibility: 0.99 }; + landmarks[24] = { x: 0.56, y: 0.58, visibility: 0.99 }; + landmarks[27] = { x: 0.43, y: 0.92, visibility: 0.99 }; + landmarks[28] = { x: 0.57, y: 0.92, visibility: 0.99 }; + + const anchors = getAvatarAnchors(landmarks, 1280, 720); + + expect(anchors).not.toBeNull(); + expect(anchors?.headRadius).toBeGreaterThan(30); + expect(anchors?.bodyHeight).toBeGreaterThan(120); + expect(anchors?.rightHandX).toBeGreaterThan(anchors?.leftHandX || 0); + }); +}); diff --git a/client/src/lib/liveCamera.ts b/client/src/lib/liveCamera.ts new file mode 100644 index 0000000..2e9902f --- /dev/null +++ b/client/src/lib/liveCamera.ts @@ -0,0 +1,514 @@ +export type LiveActionType = "forehand" | "backhand" | "serve" | "volley" | "overhead" | "slice" | "lob" | "unknown"; + +export type PosePoint = { + x: number; + y: number; + visibility?: number; +}; + +export type AvatarKey = "gorilla" | "monkey" | "pig" | "dog"; + +export type AvatarRenderState = { + enabled: boolean; + avatarKey: AvatarKey; + customLabel?: string; +}; + +export type FrameActionSample = { + action: LiveActionType; + confidence: number; + timestamp: number; +}; + +export type StableActionState = { + current: LiveActionType; + currentSince: number | null; + candidate: LiveActionType | null; + candidateSince: number | null; + candidateWindows: number; + switchCount: number; +}; + +export type StabilizedActionMeta = { + stableAction: LiveActionType; + stableConfidence: number; + windowAction: LiveActionType; + windowConfidence: number; + windowShare: number; + windowFrames: number; + windowProgress: number; + pending: boolean; + pendingAction: LiveActionType | null; + stableMs: number; + candidateMs: number; + rawVolatility: number; + switchCount: number; +}; + +type ActionStat = { + count: number; + totalConfidence: number; + share: number; + averageConfidence: number; + strength: number; +}; + +type AvatarAnchors = { + headX: number; + headY: number; + headRadius: number; + bodyX: number; + bodyY: number; + bodyWidth: number; + bodyHeight: number; + shoulderY: number; + footY: number; + leftHandX: number; + leftHandY: number; + rightHandX: number; + rightHandY: number; +}; + +const ACTIONS: LiveActionType[] = ["forehand", "backhand", "serve", "volley", "overhead", "slice", "lob", "unknown"]; + +export const ACTION_WINDOW_FRAMES = 24; +const ACTION_WINDOW_MIN_SHARE = 0.6; +const ACTION_WINDOW_MIN_CONFIDENCE = 0.58; +const ACTION_SWITCH_MIN_MS = 700; +const ACTION_UNKNOWN_MIN_MS = 900; +const ACTION_LOCK_IN_WINDOWS = 2; +const ACTION_SWITCH_DELTA = 0.12; + +export const AVATAR_PRESETS: Array<{ key: AvatarKey; label: string; keywords: string[] }> = [ + { key: "gorilla", label: "猩猩", keywords: ["gorilla", "ape", "猩猩", "猩", "大猩猩"] }, + { key: "monkey", label: "猴子", keywords: ["monkey", "ape", "猴", "猴子"] }, + { key: "pig", label: "猪", keywords: ["pig", "猪", "小猪"] }, + { key: "dog", label: "狗", keywords: ["dog", "puppy", "犬", "狗", "小狗"] }, +]; + +function clamp(value: number, min: number, max: number) { + return Math.max(min, Math.min(max, value)); +} + +function getActionStat(samples: FrameActionSample[], action: LiveActionType): ActionStat { + const matches = samples.filter((sample) => sample.action === action); + const count = matches.length; + const totalConfidence = matches.reduce((sum, sample) => sum + sample.confidence, 0); + const share = samples.length > 0 ? count / samples.length : 0; + const averageConfidence = count > 0 ? totalConfidence / count : 0; + + return { + count, + totalConfidence, + share, + averageConfidence, + strength: share * 0.7 + averageConfidence * 0.3, + }; +} + +function getWindowAction(samples: FrameActionSample[]) { + const stats = new Map(); + ACTIONS.forEach((action) => { + stats.set(action, getActionStat(samples, action)); + }); + + const ranked = ACTIONS + .map((action) => ({ action, stats: stats.get(action)! })) + .sort((a, b) => { + if (b.stats.strength !== a.stats.strength) { + return b.stats.strength - a.stats.strength; + } + return b.stats.totalConfidence - a.stats.totalConfidence; + }); + + const winner = ranked[0] ?? { action: "unknown" as LiveActionType, stats: stats.get("unknown")! }; + const qualifies = + winner.stats.share >= ACTION_WINDOW_MIN_SHARE && + winner.stats.averageConfidence >= ACTION_WINDOW_MIN_CONFIDENCE; + + return { + action: qualifies ? winner.action : "unknown", + stats, + winnerStats: winner.stats, + }; +} + +function getRawVolatility(samples: FrameActionSample[]) { + if (samples.length <= 1) return 0; + let switches = 0; + for (let index = 1; index < samples.length; index += 1) { + if (samples[index]?.action !== samples[index - 1]?.action) { + switches += 1; + } + } + return switches / (samples.length - 1); +} + +export function createStableActionState(initial: LiveActionType = "unknown"): StableActionState { + return { + current: initial, + currentSince: null, + candidate: null, + candidateSince: null, + candidateWindows: 0, + switchCount: 0, + }; +} + +export function createEmptyStabilizedActionMeta(): StabilizedActionMeta { + return { + stableAction: "unknown", + stableConfidence: 0, + windowAction: "unknown", + windowConfidence: 0, + windowShare: 0, + windowFrames: 0, + windowProgress: 0, + pending: false, + pendingAction: null, + stableMs: 0, + candidateMs: 0, + rawVolatility: 0, + switchCount: 0, + }; +} + +export function stabilizeActionStream( + sample: FrameActionSample, + history: FrameActionSample[], + state: StableActionState, +) { + history.push(sample); + if (history.length > ACTION_WINDOW_FRAMES) { + history.splice(0, history.length - ACTION_WINDOW_FRAMES); + } + + const { action: windowAction, stats } = getWindowAction(history); + const windowStats = stats.get(windowAction) ?? getActionStat(history, "unknown"); + const currentStats = stats.get(state.current) ?? getActionStat(history, state.current); + const pendingMinMs = windowAction === "unknown" ? ACTION_UNKNOWN_MIN_MS : ACTION_SWITCH_MIN_MS; + const windowProgress = clamp(history.length / ACTION_WINDOW_FRAMES, 0, 1); + + if (state.currentSince == null) { + state.currentSince = sample.timestamp; + } + + if (windowAction === state.current) { + state.candidate = null; + state.candidateSince = null; + state.candidateWindows = 0; + } else if (windowProgress >= 0.7) { + if (state.candidate !== windowAction) { + state.candidate = windowAction; + state.candidateSince = sample.timestamp; + state.candidateWindows = 1; + } else { + state.candidateWindows += 1; + } + + const candidateStats = stats.get(windowAction) ?? getActionStat(history, windowAction); + const currentStrength = state.current === "unknown" ? currentStats.strength * 0.55 : currentStats.strength; + const candidateDuration = state.candidateSince == null ? 0 : sample.timestamp - state.candidateSince; + const canSwitch = + state.candidateWindows >= ACTION_LOCK_IN_WINDOWS && + candidateDuration >= pendingMinMs && + candidateStats.strength >= currentStrength + ACTION_SWITCH_DELTA; + + if (canSwitch) { + state.current = windowAction; + state.currentSince = sample.timestamp; + state.candidate = null; + state.candidateSince = null; + state.candidateWindows = 0; + state.switchCount += 1; + } + } + + const stableStats = stats.get(state.current) ?? getActionStat(history, state.current); + const stableConfidence = state.current === "unknown" + ? Math.max(sample.confidence * 0.45, stableStats.averageConfidence) + : Math.max(stableStats.averageConfidence, windowStats.averageConfidence * 0.88); + + return { + stableAction: state.current, + stableConfidence: clamp(stableConfidence, 0, 1), + windowAction, + windowConfidence: clamp(windowStats.averageConfidence, 0, 1), + windowShare: clamp(windowStats.share, 0, 1), + windowFrames: history.length, + windowProgress, + pending: Boolean(state.candidate), + pendingAction: state.candidate, + stableMs: state.currentSince == null ? 0 : sample.timestamp - state.currentSince, + candidateMs: state.candidateSince == null ? 0 : sample.timestamp - state.candidateSince, + rawVolatility: getRawVolatility(history), + switchCount: state.switchCount, + } satisfies StabilizedActionMeta; +} + +export function resolveAvatarKeyFromPrompt(prompt: string, fallback: AvatarKey): AvatarKey { + const normalized = prompt.trim().toLowerCase(); + if (!normalized) return fallback; + const matched = AVATAR_PRESETS.find((preset) => preset.keywords.some((keyword) => normalized.includes(keyword))); + return matched?.key ?? fallback; +} + +function averagePoint(a: PosePoint | undefined, b: PosePoint | undefined, defaultX: number, defaultY: number) { + return { + x: ((a?.x ?? defaultX) + (b?.x ?? defaultX)) / 2, + y: ((a?.y ?? defaultY) + (b?.y ?? defaultY)) / 2, + }; +} + +export function getAvatarAnchors(landmarks: PosePoint[], width: number, height: number): AvatarAnchors | null { + const nose = landmarks[0]; + const leftShoulder = landmarks[11]; + const rightShoulder = landmarks[12]; + const leftHip = landmarks[23]; + const rightHip = landmarks[24]; + const leftWrist = landmarks[15]; + const rightWrist = landmarks[16]; + const leftAnkle = landmarks[27]; + const rightAnkle = landmarks[28]; + const leftEar = landmarks[7]; + const rightEar = landmarks[8]; + + if (!nose || !leftShoulder || !rightShoulder || !leftHip || !rightHip) { + return null; + } + + const shoulderCenter = averagePoint(leftShoulder, rightShoulder, 0.5, 0.32); + const hipCenter = averagePoint(leftHip, rightHip, 0.5, 0.62); + const ankleCenter = averagePoint(leftAnkle, rightAnkle, hipCenter.x, 0.92); + const shoulderSpan = Math.abs(rightShoulder.x - leftShoulder.x) * width; + const torsoHeight = Math.max((hipCenter.y - shoulderCenter.y) * height, shoulderSpan * 0.8); + const headRadius = Math.max( + shoulderSpan * 0.28, + Math.abs((leftEar?.x ?? nose.x - 0.04) - (rightEar?.x ?? nose.x + 0.04)) * width * 0.45, + 34, + ); + const bodyWidth = Math.max(shoulderSpan * 1.05, headRadius * 1.8); + const bodyHeight = Math.max(torsoHeight * 1.1, headRadius * 2.2); + + return { + headX: nose.x * width, + headY: Math.min(nose.y * height, shoulderCenter.y * height - headRadius * 0.2), + headRadius, + bodyX: shoulderCenter.x * width, + bodyY: shoulderCenter.y * height + bodyHeight * 0.48, + bodyWidth, + bodyHeight, + shoulderY: shoulderCenter.y * height, + footY: Math.max(ankleCenter.y * height, hipCenter.y * height + bodyHeight * 1.35), + leftHandX: (leftWrist?.x ?? leftShoulder.x - 0.08) * width, + leftHandY: (leftWrist?.y ?? shoulderCenter.y + 0.1) * height, + rightHandX: (rightWrist?.x ?? rightShoulder.x + 0.08) * width, + rightHandY: (rightWrist?.y ?? shoulderCenter.y + 0.1) * height, + }; +} + +function drawRoundedBody(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors, fill: string) { + const radius = Math.min(anchors.bodyWidth, anchors.bodyHeight) * 0.18; + const left = anchors.bodyX - anchors.bodyWidth / 2; + const top = anchors.bodyY - anchors.bodyHeight / 2; + const right = left + anchors.bodyWidth; + const bottom = top + anchors.bodyHeight; + + ctx.beginPath(); + ctx.moveTo(left + radius, top); + ctx.lineTo(right - radius, top); + ctx.quadraticCurveTo(right, top, right, top + radius); + ctx.lineTo(right, bottom - radius); + ctx.quadraticCurveTo(right, bottom, right - radius, bottom); + ctx.lineTo(left + radius, bottom); + ctx.quadraticCurveTo(left, bottom, left, bottom - radius); + ctx.lineTo(left, top + radius); + ctx.quadraticCurveTo(left, top, left + radius, top); + ctx.closePath(); + ctx.fillStyle = fill; + ctx.fill(); +} + +function drawLimbs(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors, stroke: string) { + ctx.strokeStyle = stroke; + ctx.lineWidth = Math.max(anchors.headRadius * 0.22, 10); + ctx.lineCap = "round"; + ctx.beginPath(); + ctx.moveTo(anchors.bodyX - anchors.bodyWidth * 0.24, anchors.shoulderY + anchors.headRadius * 0.65); + ctx.lineTo(anchors.leftHandX, anchors.leftHandY); + ctx.moveTo(anchors.bodyX + anchors.bodyWidth * 0.24, anchors.shoulderY + anchors.headRadius * 0.65); + ctx.lineTo(anchors.rightHandX, anchors.rightHandY); + ctx.moveTo(anchors.bodyX - anchors.bodyWidth * 0.14, anchors.bodyY + anchors.bodyHeight * 0.42); + ctx.lineTo(anchors.bodyX - anchors.bodyWidth * 0.18, anchors.footY); + ctx.moveTo(anchors.bodyX + anchors.bodyWidth * 0.14, anchors.bodyY + anchors.bodyHeight * 0.42); + ctx.lineTo(anchors.bodyX + anchors.bodyWidth * 0.18, anchors.footY); + ctx.stroke(); +} + +function drawGorillaAvatar(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors) { + ctx.fillStyle = "#3f3f46"; + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY, anchors.headRadius, 0, Math.PI * 2); + ctx.fill(); + + ctx.beginPath(); + ctx.arc(anchors.headX - anchors.headRadius * 0.78, anchors.headY - anchors.headRadius * 0.1, anchors.headRadius * 0.28, 0, Math.PI * 2); + ctx.arc(anchors.headX + anchors.headRadius * 0.78, anchors.headY - anchors.headRadius * 0.1, anchors.headRadius * 0.28, 0, Math.PI * 2); + ctx.fill(); + + ctx.fillStyle = "#d6d3d1"; + ctx.beginPath(); + ctx.ellipse(anchors.headX, anchors.headY + anchors.headRadius * 0.16, anchors.headRadius * 0.54, anchors.headRadius * 0.46, 0, 0, Math.PI * 2); + ctx.fill(); + + ctx.fillStyle = "#111827"; + ctx.beginPath(); + ctx.arc(anchors.headX - anchors.headRadius * 0.24, anchors.headY - anchors.headRadius * 0.12, anchors.headRadius * 0.08, 0, Math.PI * 2); + ctx.arc(anchors.headX + anchors.headRadius * 0.24, anchors.headY - anchors.headRadius * 0.12, anchors.headRadius * 0.08, 0, Math.PI * 2); + ctx.fill(); + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY + anchors.headRadius * 0.06, anchors.headRadius * 0.08, 0, Math.PI * 2); + ctx.fill(); + + drawRoundedBody(ctx, anchors, "rgba(39,39,42,0.95)"); + drawLimbs(ctx, anchors, "rgba(63,63,70,0.92)"); +} + +function drawMonkeyAvatar(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors) { + ctx.fillStyle = "#8b5a3c"; + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY, anchors.headRadius, 0, Math.PI * 2); + ctx.fill(); + ctx.beginPath(); + ctx.arc(anchors.headX - anchors.headRadius * 0.82, anchors.headY - anchors.headRadius * 0.16, anchors.headRadius * 0.34, 0, Math.PI * 2); + ctx.arc(anchors.headX + anchors.headRadius * 0.82, anchors.headY - anchors.headRadius * 0.16, anchors.headRadius * 0.34, 0, Math.PI * 2); + ctx.fill(); + + ctx.fillStyle = "#f3d7bf"; + ctx.beginPath(); + ctx.ellipse(anchors.headX, anchors.headY + anchors.headRadius * 0.14, anchors.headRadius * 0.56, anchors.headRadius * 0.5, 0, 0, Math.PI * 2); + ctx.fill(); + + drawRoundedBody(ctx, anchors, "rgba(120,53,15,0.95)"); + drawLimbs(ctx, anchors, "rgba(146,64,14,0.9)"); +} + +function drawPigAvatar(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors) { + ctx.fillStyle = "#f9a8d4"; + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY, anchors.headRadius, 0, Math.PI * 2); + ctx.fill(); + ctx.beginPath(); + ctx.moveTo(anchors.headX - anchors.headRadius * 0.62, anchors.headY - anchors.headRadius * 0.42); + ctx.lineTo(anchors.headX - anchors.headRadius * 0.18, anchors.headY - anchors.headRadius * 1.06); + ctx.lineTo(anchors.headX - anchors.headRadius * 0.02, anchors.headY - anchors.headRadius * 0.32); + ctx.closePath(); + ctx.moveTo(anchors.headX + anchors.headRadius * 0.62, anchors.headY - anchors.headRadius * 0.42); + ctx.lineTo(anchors.headX + anchors.headRadius * 0.18, anchors.headY - anchors.headRadius * 1.06); + ctx.lineTo(anchors.headX + anchors.headRadius * 0.02, anchors.headY - anchors.headRadius * 0.32); + ctx.closePath(); + ctx.fill(); + + ctx.fillStyle = "#fbcfe8"; + ctx.beginPath(); + ctx.ellipse(anchors.headX, anchors.headY + anchors.headRadius * 0.18, anchors.headRadius * 0.44, anchors.headRadius * 0.28, 0, 0, Math.PI * 2); + ctx.fill(); + ctx.fillStyle = "#be185d"; + ctx.beginPath(); + ctx.arc(anchors.headX - anchors.headRadius * 0.14, anchors.headY + anchors.headRadius * 0.18, anchors.headRadius * 0.06, 0, Math.PI * 2); + ctx.arc(anchors.headX + anchors.headRadius * 0.14, anchors.headY + anchors.headRadius * 0.18, anchors.headRadius * 0.06, 0, Math.PI * 2); + ctx.fill(); + + drawRoundedBody(ctx, anchors, "rgba(244,114,182,0.92)"); + drawLimbs(ctx, anchors, "rgba(244,114,182,0.86)"); +} + +function drawDogAvatar(ctx: CanvasRenderingContext2D, anchors: AvatarAnchors) { + ctx.fillStyle = "#d4a373"; + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY, anchors.headRadius, 0, Math.PI * 2); + ctx.fill(); + ctx.beginPath(); + ctx.ellipse(anchors.headX - anchors.headRadius * 0.72, anchors.headY - anchors.headRadius * 0.28, anchors.headRadius * 0.22, anchors.headRadius * 0.46, Math.PI / 4, 0, Math.PI * 2); + ctx.ellipse(anchors.headX + anchors.headRadius * 0.72, anchors.headY - anchors.headRadius * 0.28, anchors.headRadius * 0.22, anchors.headRadius * 0.46, -Math.PI / 4, 0, Math.PI * 2); + ctx.fill(); + + ctx.fillStyle = "#f5e6d3"; + ctx.beginPath(); + ctx.ellipse(anchors.headX, anchors.headY + anchors.headRadius * 0.16, anchors.headRadius * 0.5, anchors.headRadius * 0.38, 0, 0, Math.PI * 2); + ctx.fill(); + ctx.fillStyle = "#111827"; + ctx.beginPath(); + ctx.arc(anchors.headX, anchors.headY + anchors.headRadius * 0.04, anchors.headRadius * 0.09, 0, Math.PI * 2); + ctx.fill(); + + drawRoundedBody(ctx, anchors, "rgba(180,83,9,0.93)"); + drawLimbs(ctx, anchors, "rgba(180,83,9,0.88)"); +} + +export function drawLiveCameraOverlay( + canvas: HTMLCanvasElement | null, + landmarks: PosePoint[] | undefined, + avatarState?: AvatarRenderState, +) { + const ctx = canvas?.getContext("2d"); + if (!canvas || !ctx) return; + ctx.clearRect(0, 0, canvas.width, canvas.height); + if (!landmarks) return; + + if (avatarState?.enabled) { + const anchors = getAvatarAnchors(landmarks, canvas.width, canvas.height); + if (anchors) { + ctx.save(); + ctx.globalAlpha = 0.95; + if (avatarState.avatarKey === "monkey") { + drawMonkeyAvatar(ctx, anchors); + } else if (avatarState.avatarKey === "pig") { + drawPigAvatar(ctx, anchors); + } else if (avatarState.avatarKey === "dog") { + drawDogAvatar(ctx, anchors); + } else { + drawGorillaAvatar(ctx, anchors); + } + ctx.restore(); + + ctx.save(); + ctx.strokeStyle = "rgba(255,255,255,0.16)"; + ctx.lineWidth = 2; + ctx.setLineDash([8, 10]); + ctx.beginPath(); + ctx.moveTo(anchors.bodyX, anchors.shoulderY - anchors.headRadius * 1.25); + ctx.lineTo(anchors.bodyX, anchors.footY); + ctx.stroke(); + ctx.restore(); + return; + } + } + + const poseConnections: Array<[number, number]> = [ + [11, 12], [11, 13], [13, 15], [12, 14], [14, 16], + [11, 23], [12, 24], [23, 24], [23, 25], [24, 26], + [25, 27], [26, 28], [15, 17], [16, 18], [15, 19], + [16, 20], [17, 19], [18, 20], + ]; + + ctx.strokeStyle = "rgba(25, 211, 155, 0.9)"; + ctx.lineWidth = 3; + poseConnections.forEach(([from, to]) => { + const start = landmarks[from]; + const end = landmarks[to]; + if (!start || !end || (start.visibility ?? 1) < 0.25 || (end.visibility ?? 1) < 0.25) return; + ctx.beginPath(); + ctx.moveTo(start.x * canvas.width, start.y * canvas.height); + ctx.lineTo(end.x * canvas.width, end.y * canvas.height); + ctx.stroke(); + }); + + landmarks.forEach((point, index) => { + if ((point.visibility ?? 1) < 0.25) return; + ctx.fillStyle = index >= 11 && index <= 16 ? "rgba(253, 224, 71, 0.95)" : "rgba(255,255,255,0.88)"; + ctx.beginPath(); + ctx.arc(point.x * canvas.width, point.y * canvas.height, index >= 11 && index <= 16 ? 5 : 4, 0, Math.PI * 2); + ctx.fill(); + }); +} diff --git a/client/src/pages/LiveCamera.tsx b/client/src/pages/LiveCamera.tsx index dcb637f..cff5921 100644 --- a/client/src/pages/LiveCamera.tsx +++ b/client/src/pages/LiveCamera.tsx @@ -5,12 +5,28 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; import { Progress } from "@/components/ui/progress"; import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; import { Slider } from "@/components/ui/slider"; +import { Switch } from "@/components/ui/switch"; import { formatDateTimeShanghai } from "@/lib/time"; import { toast } from "sonner"; import { applyTrackZoom, type CameraQualityPreset, getCameraVideoConstraints, getLiveAnalysisBitrate, readTrackZoomState } from "@/lib/camera"; +import { + ACTION_WINDOW_FRAMES, + AVATAR_PRESETS, + createEmptyStabilizedActionMeta, + createStableActionState, + drawLiveCameraOverlay, + resolveAvatarKeyFromPrompt, + stabilizeActionStream, + type AvatarKey, + type AvatarRenderState, + type FrameActionSample, + type LiveActionType, + type StabilizedActionMeta, +} from "@/lib/liveCamera"; import { Activity, Camera, @@ -34,7 +50,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; type CameraFacing = "user" | "environment"; type SessionMode = "practice" | "pk"; -type ActionType = "forehand" | "backhand" | "serve" | "volley" | "overhead" | "slice" | "lob" | "unknown"; +type ActionType = LiveActionType; type PoseScore = { overall: number; @@ -82,11 +98,6 @@ type AnalyzedFrame = { feedback: string[]; }; -type ActionObservation = { - action: ActionType; - confidence: number; -}; - const ACTION_META: Record = { forehand: { label: "正手挥拍", tone: "bg-emerald-500/10 text-emerald-700", accent: "bg-emerald-500" }, backhand: { label: "反手挥拍", tone: "bg-sky-500/10 text-sky-700", accent: "bg-sky-500" }, @@ -98,23 +109,16 @@ const ACTION_META: Record = [ - [11, 12], [11, 13], [13, 15], [12, 14], [14, 16], - [11, 23], [12, 24], [23, 24], [23, 25], [24, 26], - [25, 27], [26, 28], [15, 17], [16, 18], [15, 19], - [16, 20], [17, 19], [18, 20], -]; - const SETUP_STEPS = [ { title: "固定设备", desc: "手机或平板保持稳定,避免分析阶段发生晃动", icon: }, { title: "保留全身", desc: "画面尽量覆盖从头到脚,便于识别重心和脚步", icon: }, { title: "确认视角", desc: "后置摄像头优先,横屏更适合完整挥拍追踪", icon: }, - { title: "开始分析", desc: "动作会按连续区间自动聚合,最长单段不超过 10 秒", icon: }, + { title: "开始分析", desc: "动作会先经过 24 帧稳定窗口确认,再按连续区间聚合保存", icon: }, ]; const SEGMENT_MAX_MS = 10_000; -const MERGE_GAP_MS = 500; -const MIN_SEGMENT_MS = 250; +const MERGE_GAP_MS = 900; +const MIN_SEGMENT_MS = 1_200; const CAMERA_QUALITY_PRESETS: Record = { economy: { label: "节省流量", @@ -212,55 +216,6 @@ function createSegment(action: ActionType, elapsedMs: number, frame: AnalyzedFra }; } -function stabilizeAnalyzedFrame(frame: AnalyzedFrame, history: ActionObservation[]): AnalyzedFrame { - const nextHistory = [...history, { action: frame.action, confidence: frame.confidence }].slice(-6); - history.splice(0, history.length, ...nextHistory); - - const weights = nextHistory.map((_, index) => index + 1); - const actionScores = nextHistory.reduce>((acc, sample, index) => { - const weighted = sample.confidence * weights[index]; - acc[sample.action] = (acc[sample.action] || 0) + weighted; - return acc; - }, { - forehand: 0, - backhand: 0, - serve: 0, - volley: 0, - overhead: 0, - slice: 0, - lob: 0, - unknown: 0, - }); - - const ranked = Object.entries(actionScores).sort((a, b) => b[1] - a[1]) as Array<[ActionType, number]>; - const [winner = "unknown", winnerScore = 0] = ranked[0] || []; - const [, runnerScore = 0] = ranked[1] || []; - const winnerSamples = nextHistory.filter((sample) => sample.action === winner); - const averageConfidence = winnerSamples.length > 0 - ? winnerSamples.reduce((sum, sample) => sum + sample.confidence, 0) / winnerSamples.length - : frame.confidence; - - const stableAction = - winner === "unknown" && frame.action !== "unknown" && frame.confidence >= 0.52 - ? frame.action - : winnerScore - runnerScore < 0.2 && frame.confidence >= 0.65 - ? frame.action - : winner; - - const stableConfidence = stableAction === frame.action - ? Math.max(frame.confidence, averageConfidence) - : averageConfidence; - - return { - ...frame, - action: stableAction, - confidence: clamp(stableConfidence, 0, 1), - feedback: stableAction === "unknown" - ? ["系统正在继续观察,当前窗口内未形成稳定动作特征。", ...frame.feedback].slice(0, 3) - : frame.feedback, - }; -} - function analyzePoseFrame(landmarks: Point[], tracking: TrackingState, timestamp: number): AnalyzedFrame { const nose = landmarks[0]; const leftShoulder = landmarks[11]; @@ -488,33 +443,6 @@ function analyzePoseFrame(landmarks: Point[], tracking: TrackingState, timestamp }; } -function drawOverlay(canvas: HTMLCanvasElement | null, landmarks: Point[] | undefined) { - const ctx = canvas?.getContext("2d"); - if (!canvas || !ctx) return; - ctx.clearRect(0, 0, canvas.width, canvas.height); - if (!landmarks) return; - - ctx.strokeStyle = "rgba(25, 211, 155, 0.9)"; - ctx.lineWidth = 3; - for (const [from, to] of POSE_CONNECTIONS) { - const a = landmarks[from]; - const b = landmarks[to]; - if (!a || !b || (a.visibility ?? 1) < 0.25 || (b.visibility ?? 1) < 0.25) continue; - ctx.beginPath(); - ctx.moveTo(a.x * canvas.width, a.y * canvas.height); - ctx.lineTo(b.x * canvas.width, b.y * canvas.height); - ctx.stroke(); - } - - landmarks.forEach((point, index) => { - if ((point.visibility ?? 1) < 0.25) return; - ctx.fillStyle = index >= 11 && index <= 16 ? "rgba(253, 224, 71, 0.95)" : "rgba(255,255,255,0.88)"; - ctx.beginPath(); - ctx.arc(point.x * canvas.width, point.y * canvas.height, index >= 11 && index <= 16 ? 5 : 4, 0, Math.PI * 2); - ctx.fill(); - }); -} - function ScoreBar({ label, value, accent }: { label: string; value: number; accent?: string }) { return (
@@ -559,11 +487,17 @@ export default function LiveCamera() { const animationRef = useRef(0); const sessionStartedAtRef = useRef(0); const trackingRef = useRef({}); - const actionHistoryRef = useRef([]); + const actionHistoryRef = useRef([]); + const stableActionStateRef = useRef(createStableActionState()); const currentSegmentRef = useRef(null); const segmentsRef = useRef([]); const frameSamplesRef = useRef([]); + const volatilitySamplesRef = useRef([]); const zoomTargetRef = useRef(1); + const avatarRenderRef = useRef({ + enabled: false, + avatarKey: "gorilla", + }); const [cameraActive, setCameraActive] = useState(false); const [facing, setFacing] = useState("environment"); @@ -577,12 +511,22 @@ export default function LiveCamera() { const [immersivePreview, setImmersivePreview] = useState(false); const [liveScore, setLiveScore] = useState(null); const [currentAction, setCurrentAction] = useState("unknown"); + const [rawAction, setRawAction] = useState("unknown"); const [feedback, setFeedback] = useState([]); const [segments, setSegments] = useState([]); const [durationMs, setDurationMs] = useState(0); const [segmentFilter, setSegmentFilter] = useState("all"); const [qualityPreset, setQualityPreset] = useState("economy"); const [zoomState, setZoomState] = useState(() => readTrackZoomState(null)); + const [stabilityMeta, setStabilityMeta] = useState(() => createEmptyStabilizedActionMeta()); + const [avatarEnabled, setAvatarEnabled] = useState(false); + const [avatarKey, setAvatarKey] = useState("gorilla"); + const [avatarPrompt, setAvatarPrompt] = useState(""); + + const resolvedAvatarKey = useMemo( + () => resolveAvatarKeyFromPrompt(avatarPrompt, avatarKey), + [avatarKey, avatarPrompt], + ); const uploadMutation = trpc.video.upload.useMutation(); const saveLiveSessionMutation = trpc.analysis.liveSessionSave.useMutation({ @@ -597,6 +541,14 @@ export default function LiveCamera() { }); const liveSessionsQuery = trpc.analysis.liveSessionList.useQuery({ limit: 8 }); + useEffect(() => { + avatarRenderRef.current = { + enabled: avatarEnabled, + avatarKey: resolvedAvatarKey, + customLabel: avatarPrompt.trim() || undefined, + }; + }, [avatarEnabled, avatarPrompt, resolvedAvatarKey]); + const visibleSegments = useMemo( () => segments.filter((segment) => !segment.isUnknown).sort((a, b) => b.startMs - a.startMs), [segments], @@ -697,6 +649,12 @@ export default function LiveCamera() { if (videoRef.current) { videoRef.current.srcObject = null; } + actionHistoryRef.current = []; + stableActionStateRef.current = createStableActionState(); + volatilitySamplesRef.current = []; + setCurrentAction("unknown"); + setRawAction("unknown"); + setStabilityMeta(createEmptyStabilizedActionMeta()); setZoomState(readTrackZoomState(null)); setCameraActive(false); }, [stopSessionRecorder]); @@ -906,6 +864,10 @@ export default function LiveCamera() { const averageFootwork = scoreSamples.length > 0 ? scoreSamples.reduce((sum, item) => sum + item.footwork, 0) / scoreSamples.length : liveScore?.footwork || 0; const averageConsistency = scoreSamples.length > 0 ? scoreSamples.reduce((sum, item) => sum + item.consistency, 0) / scoreSamples.length : liveScore?.consistency || 0; const sessionFeedback = Array.from(new Set(finalSegments.flatMap((segment) => segment.issueSummary))).slice(0, 5); + const averageRawVolatility = volatilitySamplesRef.current.length > 0 + ? volatilitySamplesRef.current.reduce((sum, value) => sum + value, 0) / volatilitySamplesRef.current.length + : 0; + const avatarState = avatarRenderRef.current; let uploadedVideo: { videoId: number; url: string } | null = null; const recordedBlob = await stopSessionRecorder(); @@ -948,8 +910,14 @@ export default function LiveCamera() { feedback: sessionFeedback, metrics: { actionDurations: segmentDurations, + stabilizedActionDurations: segmentDurations, averageConfidence: Math.round((scoreSamples.reduce((sum, item) => sum + item.confidence, 0) / Math.max(1, scoreSamples.length)) * 10) / 10, sampleCount: scoreSamples.length, + stableWindowFrames: ACTION_WINDOW_FRAMES, + actionSwitchCount: stableActionStateRef.current.switchCount, + rawActionVolatility: Number(averageRawVolatility.toFixed(4)), + avatarEnabled: avatarState.enabled, + avatarKey: avatarState.enabled ? avatarState.avatarKey : null, mobile, }, segments: finalSegments.map((segment) => ({ @@ -987,8 +955,15 @@ export default function LiveCamera() { currentSegmentRef.current = null; trackingRef.current = {}; actionHistoryRef.current = []; + stableActionStateRef.current = createStableActionState(); frameSamplesRef.current = []; + volatilitySamplesRef.current = []; sessionStartedAtRef.current = Date.now(); + setCurrentAction("unknown"); + setRawAction("unknown"); + setLiveScore(null); + setFeedback([]); + setStabilityMeta(createEmptyStabilizedActionMeta()); setDurationMs(0); startSessionRecorder(streamRef.current); @@ -1023,19 +998,48 @@ export default function LiveCamera() { canvas.height = video.videoHeight; } - drawOverlay(canvas, results.poseLandmarks); + drawLiveCameraOverlay(canvas, results.poseLandmarks, avatarRenderRef.current); if (!results.poseLandmarks) return; - const analyzed = stabilizeAnalyzedFrame( - analyzePoseFrame(results.poseLandmarks, trackingRef.current, performance.now()), + const frameTimestamp = performance.now(); + const analyzed = analyzePoseFrame(results.poseLandmarks, trackingRef.current, frameTimestamp); + const nextStabilityMeta = stabilizeActionStream( + { + action: analyzed.action, + confidence: analyzed.confidence, + timestamp: frameTimestamp, + }, actionHistoryRef.current, + stableActionStateRef.current, ); const elapsedMs = Date.now() - sessionStartedAtRef.current; - appendFrameToSegment(analyzed, elapsedMs); - frameSamplesRef.current.push(analyzed.score); - setLiveScore(analyzed.score); - setCurrentAction(analyzed.action); - setFeedback(analyzed.feedback); + const stabilityLabel = nextStabilityMeta.pendingAction ?? nextStabilityMeta.windowAction; + const stabilityFeedback = nextStabilityMeta.pending && stabilityLabel !== "unknown" + ? [`正在确认 ${ACTION_META[stabilityLabel].label},需要持续约 0.7 秒后再切换。`, ...analyzed.feedback] + : nextStabilityMeta.stableAction === "unknown" + ? ["系统正在积累 24 帧动作窗口,当前先作为观察片段处理。", ...analyzed.feedback] + : analyzed.action !== nextStabilityMeta.stableAction + ? [`原始候选为 ${ACTION_META[analyzed.action].label},当前保持 ${ACTION_META[nextStabilityMeta.stableAction].label}。`, ...analyzed.feedback] + : analyzed.feedback; + const displayedScore: PoseScore = { + ...analyzed.score, + confidence: Math.round(nextStabilityMeta.stableConfidence * 100), + }; + const stabilizedFrame: AnalyzedFrame = { + ...analyzed, + action: nextStabilityMeta.stableAction, + confidence: nextStabilityMeta.stableConfidence, + score: displayedScore, + feedback: stabilityFeedback.slice(0, 3), + }; + appendFrameToSegment(stabilizedFrame, elapsedMs); + frameSamplesRef.current.push(displayedScore); + volatilitySamplesRef.current.push(nextStabilityMeta.rawVolatility); + setLiveScore(displayedScore); + setCurrentAction(nextStabilityMeta.stableAction); + setRawAction(analyzed.action); + setStabilityMeta(nextStabilityMeta); + setFeedback(stabilizedFrame.feedback); setDurationMs(elapsedMs); }); @@ -1108,7 +1112,16 @@ export default function LiveCamera() { }, [facing, qualityPreset, startCamera]); const heroAction = ACTION_META[currentAction]; - const previewTitle = analyzing ? `${heroAction.label} 识别中` : cameraActive ? "准备开始实时分析" : "摄像头待启动"; + const rawActionMeta = ACTION_META[rawAction]; + const pendingActionMeta = stabilityMeta.pendingAction ? ACTION_META[stabilityMeta.pendingAction] : null; + const resolvedAvatarLabel = AVATAR_PRESETS.find((preset) => preset.key === resolvedAvatarKey)?.label || "猩猩"; + const previewTitle = analyzing + ? stabilityMeta.pending && pendingActionMeta + ? `${pendingActionMeta.label} 切换确认中` + : `${heroAction.label} 识别中` + : cameraActive + ? "准备开始实时分析" + : "摄像头待启动"; const renderPrimaryActions = (rail = false) => { const buttonClass = rail @@ -1285,12 +1298,16 @@ export default function LiveCamera() {
- 自动动作识别 + 24 帧稳定识别 + + + {avatarEnabled ? `虚拟形象 ${resolvedAvatarLabel}` : "骨架叠加"} + {sessionMode === "practice" ? "练习会话" : "训练 PK"} @@ -1303,23 +1320,27 @@ export default function LiveCamera() {

实时分析中枢

- 摄像头启动后默认自动识别正手、反手、发球、截击、高压、切削、挑高球与未知动作,连续片段会自动聚合,并回写训练记录、成就进度和综合评分。 + 摄像头启动后会持续识别正手、反手、发球、截击、高压、切削、挑高球与未知动作。系统会用 24 帧时间窗口统一动作,再把稳定动作写入片段、训练记录与评分;开启虚拟形象后,画面中的人体会被猩猩或其他卡通形象覆盖显示。

-
+
-
当前动作
+
稳定动作
{heroAction.label}
+
+
原始候选
+
{rawActionMeta.label}
+
识别时长
{formatDuration(durationMs)}
-
已聚合片段
-
{segments.length}
+
稳定窗口
+
{stabilityMeta.windowFrames}/{ACTION_WINDOW_FRAMES}
@@ -1359,12 +1380,18 @@ export default function LiveCamera() {
- {previewTitle} + {previewTitle} 非未知片段 {visibleSegments.length} + {avatarEnabled ? ( + + + 虚拟形象 {resolvedAvatarLabel} + + ) : null}
{mobile ? ( @@ -1381,9 +1408,28 @@ export default function LiveCamera() { {cameraActive && zoomState.supported ? renderZoomOverlay() : null} - {(analyzing || saving) ? ( -
- {saving ? "正在保存会话..." : `识别中 · ${formatDuration(durationMs)}`} + {(cameraActive || saving) ? ( +
+
+
+
稳定动作
+
{heroAction.label}
+
原始候选 {rawActionMeta.label}
+
+
+
稳定窗口
+
+ {stabilityMeta.windowFrames}/{ACTION_WINDOW_FRAMES} · {Math.round(stabilityMeta.windowShare * 100)}% +
+
+ {saving + ? "正在保存会话..." + : stabilityMeta.pending && pendingActionMeta + ? `切换确认中 · ${pendingActionMeta.label} · ${Math.max(0, stabilityMeta.candidateMs / 1000).toFixed(1)}s` + : `已稳定 ${Math.max(0, stabilityMeta.stableMs / 1000).toFixed(1)}s · 波动 ${Math.round(stabilityMeta.rawVolatility * 100)}%`} +
+
+
) : null}
@@ -1403,6 +1449,50 @@ export default function LiveCamera() { {renderPrimaryActions()}
+
+
+
+
+
虚拟形象替换
+
+ 开启后实时画面会用卡通形象覆盖主体,仅影响前端叠加显示,不改变动作识别与原视频归档。 +
+
+ +
+
+ 当前映射:{resolvedAvatarLabel} + {avatarPrompt.trim() ? ` · 输入 ${avatarPrompt.trim()}` : " · 可输入别名自动映射到内置形象"} +
+
+
+
形象预设
+ +
+
+
扩展别名
+ setAvatarPrompt(event.target.value)} + placeholder="例如 猴子 / dog mascot" + className="h-12 rounded-2xl border-border/60" + /> +
+
@@ -1410,7 +1500,7 @@ export default function LiveCamera() { 拍摄与流量设置 - 默认使用节省流量模式,必要时再切到更高画质。 + 默认使用节省流量模式;动作切换会经过 24 帧稳定窗口确认后再入库。
@@ -1481,7 +1571,7 @@ export default function LiveCamera() { 连续动作区间 - 自动保留非未知动作区间,单段最长 10 秒,方便后续查看和回放。 + 只保留通过稳定窗口确认后的动作区间,单段最长 10 秒,方便后续查看和回放。 @@ -1618,6 +1708,25 @@ export default function LiveCamera() { 实时反馈 +
+
+ 稳定动作 + {heroAction.label} +
+
+
原始候选 {rawActionMeta.label}
+
窗口 {stabilityMeta.windowFrames}/{ACTION_WINDOW_FRAMES}
+
占比 {Math.round(stabilityMeta.windowShare * 100)}%
+
动作切换 {stabilityMeta.switchCount} 次
+
+ +
+ {stabilityMeta.pending && pendingActionMeta + ? `当前正在确认 ${pendingActionMeta.label},确认后才会切段入库。` + : "当前区间只会按稳定动作聚合,短时抖动不会直接切换动作。"} +
+
+ {feedback.length > 0 ? feedback.map((item) => (
{item} @@ -1714,12 +1823,41 @@ export default function LiveCamera() { {heroAction.label} + {avatarEnabled ? ( + + + {resolvedAvatarLabel} + + ) : null} 核心操作在右侧
+
+
+
+
稳定动作
+
{heroAction.label}
+
+
+
原始候选
+
{rawActionMeta.label}
+
+
+
稳定窗口
+
{stabilityMeta.windowFrames}/{ACTION_WINDOW_FRAMES}
+
+
+
当前状态
+
+ {stabilityMeta.pending && pendingActionMeta ? `确认 ${pendingActionMeta.label}` : "稳定跟踪中"} +
+
+
+
+