Initial project bootstrap

2026-03-14 07:18:53 -04:00
--- a/server/_core/voiceTranscription.ts
+++ b/server/_core/voiceTranscription.ts
@@ -0,0 +1,284 @@
+/**
+ * Voice transcription helper using internal Speech-to-Text service
+ *
+ * Frontend implementation guide:
+ * 1. Capture audio using MediaRecorder API
+ * 2. Upload audio to storage (e.g., S3) to get URL
+ * 3. Call transcription with the URL
+ * 
+ * Example usage:
+ * ```tsx
+ * // Frontend component
+ * const transcribeMutation = trpc.voice.transcribe.useMutation({
+ *   onSuccess: (data) => {
+ *     console.log(data.text); // Full transcription
+ *     console.log(data.language); // Detected language
+ *     console.log(data.segments); // Timestamped segments
+ *   }
+ * });
+ * 
+ * // After uploading audio to storage
+ * transcribeMutation.mutate({
+ *   audioUrl: uploadedAudioUrl,
+ *   language: 'en', // optional
+ *   prompt: 'Transcribe the meeting' // optional
+ * });
+ * ```
+ */
+import { ENV } from "./env";
+
+export type TranscribeOptions = {
+  audioUrl: string; // URL to the audio file (e.g., S3 URL)
+  language?: string; // Optional: specify language code (e.g., "en", "es", "zh")
+  prompt?: string; // Optional: custom prompt for the transcription
+};
+
+// Native Whisper API segment format
+export type WhisperSegment = {
+  id: number;
+  seek: number;
+  start: number;
+  end: number;
+  text: string;
+  tokens: number[];
+  temperature: number;
+  avg_logprob: number;
+  compression_ratio: number;
+  no_speech_prob: number;
+};
+
+// Native Whisper API response format
+export type WhisperResponse = {
+  task: "transcribe";
+  language: string;
+  duration: number;
+  text: string;
+  segments: WhisperSegment[];
+};
+
+export type TranscriptionResponse = WhisperResponse; // Return native Whisper API response directly
+
+export type TranscriptionError = {
+  error: string;
+  code: "FILE_TOO_LARGE" | "INVALID_FORMAT" | "TRANSCRIPTION_FAILED" | "UPLOAD_FAILED" | "SERVICE_ERROR";
+  details?: string;
+};
+
+/**
+ * Transcribe audio to text using the internal Speech-to-Text service
+ * 
+ * @param options - Audio data and metadata
+ * @returns Transcription result or error
+ */
+export async function transcribeAudio(
+  options: TranscribeOptions
+): Promise<TranscriptionResponse | TranscriptionError> {
+  try {
+    // Step 1: Validate environment configuration
+    if (!ENV.forgeApiUrl) {
+      return {
+        error: "Voice transcription service is not configured",
+        code: "SERVICE_ERROR",
+        details: "BUILT_IN_FORGE_API_URL is not set"
+      };
+    }
+    if (!ENV.forgeApiKey) {
+      return {
+        error: "Voice transcription service authentication is missing",
+        code: "SERVICE_ERROR",
+        details: "BUILT_IN_FORGE_API_KEY is not set"
+      };
+    }
+
+    // Step 2: Download audio from URL
+    let audioBuffer: Buffer;
+    let mimeType: string;
+    try {
+      const response = await fetch(options.audioUrl);
+      if (!response.ok) {
+        return {
+          error: "Failed to download audio file",
+          code: "INVALID_FORMAT",
+          details: `HTTP ${response.status}: ${response.statusText}`
+        };
+      }
+      
+      audioBuffer = Buffer.from(await response.arrayBuffer());
+      mimeType = response.headers.get('content-type') || 'audio/mpeg';
+      
+      // Check file size (16MB limit)
+      const sizeMB = audioBuffer.length / (1024 * 1024);
+      if (sizeMB > 16) {
+        return {
+          error: "Audio file exceeds maximum size limit",
+          code: "FILE_TOO_LARGE",
+          details: `File size is ${sizeMB.toFixed(2)}MB, maximum allowed is 16MB`
+        };
+      }
+    } catch (error) {
+      return {
+        error: "Failed to fetch audio file",
+        code: "SERVICE_ERROR",
+        details: error instanceof Error ? error.message : "Unknown error"
+      };
+    }
+
+    // Step 3: Create FormData for multipart upload to Whisper API
+    const formData = new FormData();
+    
+    // Create a Blob from the buffer and append to form
+    const filename = `audio.${getFileExtension(mimeType)}`;
+    const audioBlob = new Blob([new Uint8Array(audioBuffer)], { type: mimeType });
+    formData.append("file", audioBlob, filename);
+    
+    formData.append("model", "whisper-1");
+    formData.append("response_format", "verbose_json");
+    
+    // Add prompt - use custom prompt if provided, otherwise generate based on language
+    const prompt = options.prompt || (
+      options.language 
+        ? `Transcribe the user's voice to text, the user's working language is ${getLanguageName(options.language)}`
+        : "Transcribe the user's voice to text"
+    );
+    formData.append("prompt", prompt);
+
+    // Step 4: Call the transcription service
+    const baseUrl = ENV.forgeApiUrl.endsWith("/")
+      ? ENV.forgeApiUrl
+      : `${ENV.forgeApiUrl}/`;
+    
+    const fullUrl = new URL(
+      "v1/audio/transcriptions",
+      baseUrl
+    ).toString();
+
+    const response = await fetch(fullUrl, {
+      method: "POST",
+      headers: {
+        authorization: `Bearer ${ENV.forgeApiKey}`,
+        "Accept-Encoding": "identity",
+      },
+      body: formData,
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => "");
+      return {
+        error: "Transcription service request failed",
+        code: "TRANSCRIPTION_FAILED",
+        details: `${response.status} ${response.statusText}${errorText ? `: ${errorText}` : ""}`
+      };
+    }
+
+    // Step 5: Parse and return the transcription result
+    const whisperResponse = await response.json() as WhisperResponse;
+    
+    // Validate response structure
+    if (!whisperResponse.text || typeof whisperResponse.text !== 'string') {
+      return {
+        error: "Invalid transcription response",
+        code: "SERVICE_ERROR",
+        details: "Transcription service returned an invalid response format"
+      };
+    }
+
+    return whisperResponse; // Return native Whisper API response directly
+
+  } catch (error) {
+    // Handle unexpected errors
+    return {
+      error: "Voice transcription failed",
+      code: "SERVICE_ERROR",
+      details: error instanceof Error ? error.message : "An unexpected error occurred"
+    };
+  }
+}
+
+/**
+ * Helper function to get file extension from MIME type
+ */
+function getFileExtension(mimeType: string): string {
+  const mimeToExt: Record<string, string> = {
+    'audio/webm': 'webm',
+    'audio/mp3': 'mp3',
+    'audio/mpeg': 'mp3',
+    'audio/wav': 'wav',
+    'audio/wave': 'wav',
+    'audio/ogg': 'ogg',
+    'audio/m4a': 'm4a',
+    'audio/mp4': 'm4a',
+  };
+  
+  return mimeToExt[mimeType] || 'audio';
+}
+
+/**
+ * Helper function to get full language name from ISO code
+ */
+function getLanguageName(langCode: string): string {
+  const langMap: Record<string, string> = {
+    'en': 'English',
+    'es': 'Spanish',
+    'fr': 'French',
+    'de': 'German',
+    'it': 'Italian',
+    'pt': 'Portuguese',
+    'ru': 'Russian',
+    'ja': 'Japanese',
+    'ko': 'Korean',
+    'zh': 'Chinese',
+    'ar': 'Arabic',
+    'hi': 'Hindi',
+    'nl': 'Dutch',
+    'pl': 'Polish',
+    'tr': 'Turkish',
+    'sv': 'Swedish',
+    'da': 'Danish',
+    'no': 'Norwegian',
+    'fi': 'Finnish',
+  };
+  
+  return langMap[langCode] || langCode;
+}
+
+/**
+ * Example tRPC procedure implementation:
+ * 
+ * ```ts
+ * // In server/routers.ts
+ * import { transcribeAudio } from "./_core/voiceTranscription";
+ * 
+ * export const voiceRouter = router({
+ *   transcribe: protectedProcedure
+ *     .input(z.object({
+ *       audioUrl: z.string(),
+ *       language: z.string().optional(),
+ *       prompt: z.string().optional(),
+ *     }))
+ *     .mutation(async ({ input, ctx }) => {
+ *       const result = await transcribeAudio(input);
+ *       
+ *       // Check if it's an error
+ *       if ('error' in result) {
+ *         throw new TRPCError({
+ *           code: 'BAD_REQUEST',
+ *           message: result.error,
+ *           cause: result,
+ *         });
+ *       }
+ *       
+ *       // Optionally save transcription to database
+ *       await db.insert(transcriptions).values({
+ *         userId: ctx.user.id,
+ *         text: result.text,
+ *         duration: result.duration,
+ *         language: result.language,
+ *         audioUrl: input.audioUrl,
+ *         createdAt: new Date(),
+ *       });
+ *       
+ *       return result;
+ *     }),
+ * });
+ * ```
+ */