feat: transcription fix + SLA write-back + real-time supervisor events

- Deepgram: multichannel=true + language=multi (captures both speakers, multilingual)
- LLM speaker identification (agent vs customer from conversational cues)
- Removed summarize=v2 (incompatible with multilingual)
- SLA computation on call creation (lead.createdAt → call.startedAt elapsed %)
- WebSocket: supervisor room + call:created broadcast for real-time updates
- Maint: clear-analysis-cache endpoint + scanKeys/deleteCache on SessionService
- AI chat: rules-engine context routing with dedicated system prompt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-01 16:59:23 +05:30
parent b8556cf440
commit 5e3ccbd040
8 changed files with 461 additions and 33 deletions

View File

@@ -57,10 +57,10 @@ export class RecordingsService {
// Step 1: Send to Deepgram pre-recorded API with diarization + sentiment
const dgResponse = await fetch(DEEPGRAM_API + '?' + new URLSearchParams({
model: 'nova-2',
language: 'en',
language: 'multi',
smart_format: 'true',
diarize: 'true',
summarize: 'v2',
multichannel: 'true',
topics: 'true',
sentiment: 'true',
utterances: 'true',
@@ -82,9 +82,9 @@ export class RecordingsService {
const dgData = await dgResponse.json();
const results = dgData.results;
// Extract utterances (speaker-labeled segments)
// Extract utterances (channel-labeled for multichannel, speaker-labeled otherwise)
const utterances: TranscriptUtterance[] = (results?.utterances ?? []).map((u: any) => ({
speaker: u.speaker ?? 0,
speaker: u.channel ?? u.speaker ?? 0,
start: u.start ?? 0,
end: u.end ?? 0,
text: u.transcript ?? '',
@@ -106,14 +106,27 @@ export class RecordingsService {
? results.channels[0].alternatives[0].words.slice(-1)[0].end
: 0;
// Step 2: Full transcript text for AI analysis
const fullTranscript = utterances.map(u =>
`Speaker ${u.speaker === 0 ? 'Agent' : 'Customer'}: ${u.text}`,
// Step 2: Build raw transcript with channel labels for AI to identify roles
const rawTranscript = utterances.map(u =>
`Channel ${u.speaker}: ${u.text}`,
).join('\n');
this.logger.log(`[RECORDING] Transcribed: ${utterances.length} utterances, ${Math.round(duration)}s`);
// Step 3: AI insights
// Step 3: Ask AI to identify agent vs customer, then generate insights
const speakerMap = await this.identifySpeakers(rawTranscript);
const fullTranscript = utterances.map(u =>
`${speakerMap[u.speaker] ?? `Speaker ${u.speaker}`}: ${u.text}`,
).join('\n');
// Remap utterance speaker labels for the frontend
for (const u of utterances) {
// 0 = agent, 1 = customer in the returned data
const role = speakerMap[u.speaker];
if (role === 'Agent') u.speaker = 0;
else if (role === 'Customer') u.speaker = 1;
}
const insights = await this.generateInsights(fullTranscript, summary, topics);
return {
@@ -126,6 +139,45 @@ export class RecordingsService {
};
}
private async identifySpeakers(rawTranscript: string): Promise<Record<number, string>> {
if (!this.aiModel || !rawTranscript.trim()) {
return { 0: 'Customer', 1: 'Agent' };
}
try {
const { object } = await generateObject({
model: this.aiModel,
schema: z.object({
agentChannel: z.number().describe('The channel number (0 or 1) that is the call center agent'),
reasoning: z.string().describe('Brief explanation of how you identified the agent'),
}),
system: `You are analyzing a hospital call center recording transcript.
Each line is labeled with a channel number. One channel is the call center agent, the other is the customer/patient.
The AGENT typically:
- Greets professionally ("Hello, Global Hospital", "How can I help you?")
- Asks for patient details (name, phone, department)
- Provides information about doctors, schedules, services
- Navigates systems, puts on hold, transfers calls
The CUSTOMER typically:
- Asks questions about appointments, doctors, services
- Provides personal details when asked
- Describes symptoms or reasons for calling`,
prompt: rawTranscript,
maxOutputTokens: 100,
});
const agentCh = object.agentChannel;
const customerCh = agentCh === 0 ? 1 : 0;
this.logger.log(`[RECORDING] Speaker ID: agent=Ch${agentCh}, customer=Ch${customerCh} (${object.reasoning})`);
return { [agentCh]: 'Agent', [customerCh]: 'Customer' };
} catch (err) {
this.logger.warn(`[RECORDING] Speaker identification failed: ${err}`);
return { 0: 'Customer', 1: 'Agent' };
}
}
private computeAverageSentiment(segments: any[]): { label: 'positive' | 'neutral' | 'negative' | 'mixed'; score: number } {
if (!segments?.length) return { label: 'neutral', score: 0 };