helix-engage-server/src/recordings/recordings.service.ts

import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { generateObject } from 'ai';
import { z } from 'zod';
import { createAiModel } from '../ai/ai-provider';
import type { LanguageModel } from 'ai';
import { AiConfigService } from '../config/ai-config.service';

const DEEPGRAM_API = 'https://api.deepgram.com/v1/listen';

export type TranscriptWord = {
    word: string;
    start: number;
    end: number;
    speaker: number;
    confidence: number;
};

export type TranscriptUtterance = {
    speaker: number;
    start: number;
    end: number;
    text: string;
};

export type CallAnalysis = {
    transcript: TranscriptUtterance[];
    summary: string | null;
    sentiment: 'positive' | 'neutral' | 'negative' | 'mixed';
    sentimentScore: number;
    insights: {
        keyTopics: string[];
        actionItems: string[];
        coachingNotes: string[];
        complianceFlags: string[];
        patientSatisfaction: string;
        callOutcome: string;
    };
    durationSec: number;
};

@Injectable()
export class RecordingsService {
    private readonly logger = new Logger(RecordingsService.name);
    private readonly deepgramApiKey: string;
    private readonly aiModel: LanguageModel | null;

    constructor(
        private config: ConfigService,
        private aiConfig: AiConfigService,
    ) {
        this.deepgramApiKey = process.env.DEEPGRAM_API_KEY ?? '';
        const cfg = aiConfig.getConfig();
        this.aiModel = createAiModel({
            provider: cfg.provider,
            model: cfg.model,
            anthropicApiKey: config.get<string>('ai.anthropicApiKey'),
            openaiApiKey: config.get<string>('ai.openaiApiKey'),
        });
    }

    async analyzeRecording(recordingUrl: string): Promise<CallAnalysis> {
        if (!this.deepgramApiKey) throw new Error('DEEPGRAM_API_KEY not configured');

        this.logger.log(`[RECORDING] Analyzing: ${recordingUrl}`);

        // Step 1: Send to Deepgram pre-recorded API with diarization + sentiment
        const dgResponse = await fetch(DEEPGRAM_API + '?' + new URLSearchParams({
            model: 'nova-2',
            language: 'multi',
            smart_format: 'true',
            diarize: 'true',
            multichannel: 'true',
            topics: 'true',
            sentiment: 'true',
            utterances: 'true',
        }), {
            method: 'POST',
            headers: {
                'Authorization': `Token ${this.deepgramApiKey}`,
                'Content-Type': 'application/json',
            },
            body: JSON.stringify({ url: recordingUrl }),
        });

        if (!dgResponse.ok) {
            const err = await dgResponse.text();
            this.logger.error(`[RECORDING] Deepgram failed: ${dgResponse.status} ${err}`);
            throw new Error(`Deepgram transcription failed: ${dgResponse.status}`);
        }

        const dgData = await dgResponse.json();
        const results = dgData.results;

        // Extract utterances (channel-labeled for multichannel, speaker-labeled otherwise)
        const utterances: TranscriptUtterance[] = (results?.utterances ?? []).map((u: any) => ({
            speaker: u.channel ?? u.speaker ?? 0,
            start: u.start ?? 0,
            end: u.end ?? 0,
            text: u.transcript ?? '',
        }));

        // Extract summary
        const summary = results?.summary?.short ?? null;

        // Extract sentiment from Deepgram
        const sentiments = results?.sentiments?.segments ?? [];
        const avgSentiment = this.computeAverageSentiment(sentiments);

        // Extract topics
        const topics = results?.topics?.segments?.flatMap((s: any) =>
            (s.topics ?? []).map((t: any) => t.topic),
        ) ?? [];

        const duration = results?.channels?.[0]?.alternatives?.[0]?.words?.length > 0
            ? results.channels[0].alternatives[0].words.slice(-1)[0].end
            : 0;

        // Step 2: Build raw transcript with channel labels for AI to identify roles
        const rawTranscript = utterances.map(u =>
            `Channel ${u.speaker}: ${u.text}`,
        ).join('\n');

        this.logger.log(`[RECORDING] Transcribed: ${utterances.length} utterances, ${Math.round(duration)}s`);

        // Step 3: Ask AI to identify agent vs customer, then generate insights
        const speakerMap = await this.identifySpeakers(rawTranscript);
        const fullTranscript = utterances.map(u =>
            `${speakerMap[u.speaker] ?? `Speaker ${u.speaker}`}: ${u.text}`,
        ).join('\n');

        // Remap utterance speaker labels for the frontend
        for (const u of utterances) {
            // 0 = agent, 1 = customer in the returned data
            const role = speakerMap[u.speaker];
            if (role === 'Agent') u.speaker = 0;
            else if (role === 'Customer') u.speaker = 1;
        }

        const insights = await this.generateInsights(fullTranscript, summary, topics);

        return {
            transcript: utterances,
            summary,
            sentiment: avgSentiment.label,
            sentimentScore: avgSentiment.score,
            insights,
            durationSec: Math.round(duration),
        };
    }

    private async identifySpeakers(rawTranscript: string): Promise<Record<number, string>> {
        if (!this.aiModel || !rawTranscript.trim()) {
            return { 0: 'Customer', 1: 'Agent' };
        }

        try {
            const { object } = await generateObject({
                model: this.aiModel,
                schema: z.object({
                    agentChannel: z.number().describe('The channel number (0 or 1) that is the call center agent'),
                    reasoning: z.string().describe('Brief explanation of how you identified the agent'),
                }),
                system: `You are analyzing a hospital call center recording transcript.
Each line is labeled with a channel number. One channel is the call center agent, the other is the customer/patient.

The AGENT typically:
- Greets professionally ("Hello, Global Hospital", "How can I help you?")
- Asks for patient details (name, phone, department)
- Provides information about doctors, schedules, services
- Navigates systems, puts on hold, transfers calls

The CUSTOMER typically:
- Asks questions about appointments, doctors, services
- Provides personal details when asked
- Describes symptoms or reasons for calling`,
                prompt: rawTranscript,
                maxOutputTokens: 100,
            });

            const agentCh = object.agentChannel;
            const customerCh = agentCh === 0 ? 1 : 0;
            this.logger.log(`[RECORDING] Speaker ID: agent=Ch${agentCh}, customer=Ch${customerCh} (${object.reasoning})`);
            return { [agentCh]: 'Agent', [customerCh]: 'Customer' };
        } catch (err) {
            this.logger.warn(`[RECORDING] Speaker identification failed: ${err}`);
            return { 0: 'Customer', 1: 'Agent' };
        }
    }

    private computeAverageSentiment(segments: any[]): { label: 'positive' | 'neutral' | 'negative' | 'mixed'; score: number } {
        if (!segments?.length) return { label: 'neutral', score: 0 };

        let positive = 0, negative = 0, neutral = 0;
        for (const seg of segments) {
            const s = seg.sentiment ?? 'neutral';
            if (s === 'positive') positive++;
            else if (s === 'negative') negative++;
            else neutral++;
        }

        const total = segments.length;
        const score = (positive - negative) / total;

        if (positive > negative * 2) return { label: 'positive', score };
        if (negative > positive * 2) return { label: 'negative', score };
        if (positive > 0 && negative > 0) return { label: 'mixed', score };
        return { label: 'neutral', score };
    }

    private async generateInsights(
        transcript: string,
        summary: string | null,
        topics: string[],
    ): Promise<CallAnalysis['insights']> {
        if (!this.aiModel || !transcript.trim()) {
            return {
                keyTopics: topics.slice(0, 5),
                actionItems: [],
                coachingNotes: [],
                complianceFlags: [],
                patientSatisfaction: 'Unknown',
                callOutcome: 'Unknown',
            };
        }

        try {
            const { object } = await generateObject({
                model: this.aiModel,
                schema: z.object({
                    keyTopics: z.array(z.string()).describe('Main topics discussed (max 5)'),
                    actionItems: z.array(z.string()).describe('Follow-up actions needed'),
                    coachingNotes: z.array(z.string()).describe('Agent performance observations — what went well and what could improve'),
                    complianceFlags: z.array(z.string()).describe('Any compliance concerns (HIPAA, patient safety, misinformation)'),
                    patientSatisfaction: z.string().describe('One-line assessment of patient satisfaction'),
                    callOutcome: z.string().describe('One-line summary of what was accomplished'),
                }),
                system: this.aiConfig.renderPrompt('recordingAnalysis', {
                    hospitalName: process.env.HOSPITAL_NAME ?? 'the hospital',
                    summaryBlock: summary ? `\nCall summary: ${summary}` : '',
                    topicsBlock: topics.length > 0 ? `\nDetected topics: ${topics.join(', ')}` : '',
                }),
                prompt: transcript,
                maxOutputTokens: 500,
            });

            return object;
        } catch (err) {
            this.logger.error(`[RECORDING] AI insights failed: ${err}`);
            return {
                keyTopics: topics.slice(0, 5),
                actionItems: [],
                coachingNotes: [],
                complianceFlags: [],
                patientSatisfaction: 'Analysis unavailable',
                callOutcome: 'Analysis unavailable',
            };
        }
    }
}