/**
* Transcript export formatters.
* Each function accepts a Transcript and a speakers dict (id → Speaker) and returns
* a plain-text string ready to be written to a file or rendered in a preview.
*/
import { SCRIPT_SPEAKER_INDENT, SCRIPT_DIALOGUE_INDENT } from "./constants.js"
// ── Private helpers ───────────────────────────────────────────────────────────
/**
* Formats a seconds value as HH:MM:SS.
*
* @param {number} totalSeconds - Total number of seconds to format.
* @returns {string}
*/
function fmtHMS(totalSeconds) {
const s = Math.floor(totalSeconds);
const h = Math.floor(s / 3600);
const m = Math.floor((s % 3600) / 60);
const sec = s % 60;
return [h, m, sec].map(n => String(n).padStart(2, '0')).join(':');
}
/**
* Returns the display name for a speaker id, falling back to the raw id.
*
* @param {string} speakerId - The speaker identifier.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @returns {string}
*/
function displayName(speakerId, speakers) {
return speakers[speakerId]?.name ?? speakerId;
}
/**
* Returns speaker display names in order of first appearance in the transcript.
*
* @param {Transcript} transcript - The transcript to scan.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @returns {string[]}
*/
function speakersInOrder(transcript, speakers) {
const seen = new Set();
const names = [];
for (const seg of transcript.segments) {
if (!seen.has(seg.speaker)) {
seen.add(seg.speaker);
names.push(displayName(seg.speaker, speakers));
}
}
return names;
}
// ── Formatters ────────────────────────────────────────────────────────────────
/**
* Formats a transcript in screenplay / dialogue-list style.
* Timecodes appear at each new minute boundary; speaker names are indented and
* uppercased; dialogue is word-wrapped with a smaller indent.
*
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @param {object} [options] - Formatting options.
* @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
* @param {boolean} [options.includeTimestamps=true] - Whether to insert minute-boundary timecodes.
* @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
* @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
* @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
* @param {string} [options.title=''] - Document title shown in the header.
* @param {string} [options.description=''] - Optional description shown beneath the title.
* @returns {string}
*/
export function formatScript(transcript, speakers, {
includeSpeakers = false, includeTimestamps = true,
displayRecordingDate = false, displayExportDate = false,
recordingDate = '', title = '', description = '',
} = {}) {
const lines = [];
const hr = '─'.repeat(42);
if (title || description || (displayRecordingDate && recordingDate) || displayExportDate) {
if (title) lines.push(' '.repeat(SCRIPT_SPEAKER_INDENT) + title.toUpperCase());
if (description) lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + description);
if (displayRecordingDate && recordingDate)
lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + `Recorded: ${recordingDate}`);
if (displayExportDate)
lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + `Exported: ${new Date().toLocaleDateString()}`);
lines.push(hr);
}
if (includeSpeakers) {
const names = speakersInOrder(transcript, speakers);
lines.push('SPEAKERS:', ...names.map(n => ' ' + n));
lines.push(hr);
}
let lastMinute = -1;
let afterTimecode = false;
let lastSpeaker = null;
for (const para of transcript.paragraphs) {
const paraStart = para.segments[0].start;
const currentMinute = Math.floor(paraStart / 60);
if (includeTimestamps && currentMinute !== lastMinute) {
// New minute — insert timecode block
if (lines.length > 0 && !afterTimecode) lines.push('');
lines.push(`(${fmtHMS(currentMinute * 60)})`);
lines.push('');
lastMinute = currentMinute;
afterTimecode = true;
} else if (!afterTimecode) {
// Paragraph break within the same minute (or when timestamps disabled)
lines.push('');
}
afterTimecode = false;
// Speaker name — uppercase, indented; CONT'D if the speaker didn't change
const baseName = displayName(para.speaker, speakers).toUpperCase();
const label = para.speaker === lastSpeaker ? `${baseName} (CONT'D)` : baseName;
lastSpeaker = para.speaker;
lines.push(' '.repeat(SCRIPT_SPEAKER_INDENT) + label);
// Dialogue — one continuous line; the file renderer or CSS handles wrapping
const text = para.segments.map(s => s.text.trim()).filter(Boolean).join(' ');
lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + text);
}
return lines.join('\n').trimEnd();
}
/**
* Formats a transcript in professional report style.
* Each speaker block starts with the speaker's name as a standalone header,
* followed by paragraph text. Multiple paragraphs within a block are separated
* by a blank line; speaker blocks are separated by a blank line.
*
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @param {object} [options] - Formatting options.
* @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
* @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
* @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
* @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
* @param {string} [options.title=''] - Document title shown in the header.
* @param {string} [options.description=''] - Optional description shown beneath the title.
* @returns {string}
*/
export function formatProfessional(transcript, speakers, {
includeSpeakers = false,
displayRecordingDate = false, displayExportDate = false,
recordingDate = '', title = '', description = '',
} = {}) {
const parts = [];
const hr = '─'.repeat(42);
if (title || description || (displayRecordingDate && recordingDate) || displayExportDate) {
const headerLines = [];
if (title) headerLines.push(title);
if (description) headerLines.push(description);
if (displayRecordingDate && recordingDate) headerLines.push(`Recorded: ${recordingDate}`);
if (displayExportDate) headerLines.push(`Exported: ${new Date().toLocaleDateString()}`);
headerLines.push(hr);
parts.push(headerLines.join('\n'));
}
if (includeSpeakers) {
const names = speakersInOrder(transcript, speakers);
parts.push('Speakers:\n' + names.map(n => ' ' + n).join('\n') + '\n' + hr);
}
const blocks = transcript.speakerBlocks.map(block => {
const name = displayName(block.speaker, speakers);
const paras = block.paragraphs
.map(para => para.segments.map(s => s.text).join(' ').trim())
.join('\n\n');
return `${name}\n\n${paras}`;
});
parts.push(...blocks);
return parts.join('\n\n');
}
/**
* Formats a transcript as a time-stamped transcription document.
* Includes a metadata header, then each paragraph prefixed with its start
* timecode and speaker name, with the text quoted on the following line.
*
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @param {object} [options] - Formatting options.
* @param {string} [options.title=''] - Source title shown in the header.
* @param {string} [options.description=''] - Optional description shown beneath the title.
* @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
* @param {boolean} [options.includeTimestamps=true] - Whether to prefix paragraphs with timecodes.
* @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
* @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
* @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
* @returns {string}
*/
export function formatTranscript(transcript, speakers, {
title = '', description = '', includeSpeakers = false,
includeTimestamps = true,
displayRecordingDate = false, displayExportDate = false,
recordingDate = '',
} = {}) {
const hr = '─'.repeat(42);
const headerLines = [ title || '[Audio/Video Title]' ];
if (description) headerLines.push(description);
if (displayRecordingDate && recordingDate) headerLines.push(`Recorded: ${recordingDate}`);
if (displayExportDate) headerLines.push(`Exported: ${new Date().toLocaleDateString()}`);
if (includeSpeakers) {
const names = speakersInOrder(transcript, speakers);
headerLines.push(hr, '', `Speakers: ${names.join(', ')}`);
}
headerLines.push(hr, '', 'Transcript:');
const header = headerLines.join('\n');
const body = transcript.paragraphs.map(para => {
const name = displayName(para.speaker, speakers);
const text = para.segments.map(s => s.text).join(' ').trim();
if (includeTimestamps) {
const t = fmtHMS(para.segments[0].start);
return `[${t}] ${name}:\n"${text}"`;
}
return `${name}:\n"${text}"`;
}).join('\n\n');
return `${header}\n\n${body}`;
}
/**
* Formats a transcript as Markdown.
* The document title becomes an H1; speaker names become H2 headings;
* paragraph text follows as body text. Optional timestamps appear as
* inline code spans before each paragraph.
*
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @param {object} [options] - Formatting options.
* @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
* @param {boolean} [options.includeTimestamps=true] - Whether to prefix paragraphs with timecodes.
* @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
* @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
* @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
* @param {string} [options.title=''] - Document title rendered as an H1 heading.
* @param {string} [options.description=''] - Optional description shown beneath the title.
* @returns {string}
*/
export function formatMarkdown(transcript, speakers, {
includeSpeakers = false, includeTimestamps = true,
displayRecordingDate = false, displayExportDate = false,
recordingDate = '', title = '', description = '',
} = {}) {
const lines = [];
if (title) lines.push(`# ${title}`, '');
if (description) lines.push(description, '');
if (displayRecordingDate && recordingDate) lines.push(`*Recorded: ${recordingDate}*`, '');
if (displayExportDate) lines.push(`*Exported: ${new Date().toLocaleDateString()}*`, '');
if (lines.length) lines.push('---', '');
if (includeSpeakers) {
const names = speakersInOrder(transcript, speakers);
lines.push(`**Speakers:** ${names.join(', ')}`, '', '---', '');
}
for (const block of transcript.speakerBlocks) {
const name = displayName(block.speaker, speakers);
lines.push(`#### ${name}`, '');
for (const para of block.paragraphs) {
const text = para.segments.map(s => s.text).join(' ').trim();
if (includeTimestamps) {
const t = fmtHMS(para.segments[0].start);
lines.push(`\`[${t}]\` ${text}`, '');
} else {
lines.push(text, '');
}
}
}
return lines.join('\n').trimEnd();
}
/**
* Formats a transcript as an RFC 4180 CSV with columns: start, end, speaker, text.
* Speaker IDs are replaced with their display names.
*
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @returns {string}
*/
export function formatCSV(transcript, speakers) {
const escape = v => /[",\n]/.test(String(v)) ? `"${String(v).replace(/"/g, '""')}"` : String(v);
const rows = transcript.segments.map(s =>
[s.start, s.end, escape(displayName(s.speaker, speakers)), escape(s.text)].join(',')
);
return ['start,end,speaker,text', ...rows].join('\n');
}
/**
* Dispatches to the correct formatter for the given style key.
*
* @param {string} style - 'script' | 'professional' | 'transcript' | 'csv'
* @param {Transcript} transcript - The transcript to format.
* @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
* @param {object} [options] - Passed through to formatTranscript.
* @returns {string}
*/
export function format(style, transcript, speakers, options = {}) {
switch (style) {
case 'script': return formatScript(transcript, speakers, options);
case 'professional': return formatProfessional(transcript, speakers, options);
case 'transcript': return formatTranscript(transcript, speakers, options);
case 'md': return formatMarkdown(transcript, speakers, options);
case 'csv': return formatCSV(transcript, speakers);
default: return formatTranscript(transcript, speakers, options);
}
}
// ── File writers ──────────────────────────────────────────────────────────────
/**
* Triggers a browser file download for the given Blob.
*
* @param {Blob} blob - The file content as a Blob.
* @param {string} filename - The suggested download filename.
*/
function triggerDownload(blob, filename) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
/**
* Writes a plain-text export to the given path.
*
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - Formatted transcript text from one of the format* functions.
* @returns {Promise<void>}
*/
export async function exportTXT(path, text) {
triggerDownload(new Blob([text], { type: 'text/plain;charset=utf-8' }), path);
}
/**
* Writes a Markdown export to the given path.
* Speaker names become bold headings; paragraph text follows beneath each.
*
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - Formatted transcript text from one of the format* functions.
* @returns {Promise<void>}
*/
export async function exportMarkdown(path, text) {
triggerDownload(new Blob([text], { type: 'text/markdown;charset=utf-8' }), path);
}
/**
* Writes a CSV export to the given path.
*
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - CSV string from formatCSV().
* @returns {Promise<void>}
*/
export async function exportCSV(path, text) {
triggerDownload(new Blob([text], { type: 'text/csv;charset=utf-8' }), path);
}
/**
* Writes a DOCX export to the given path, applying style-appropriate typography.
* Professional: bold speaker headings, justified body text, Georgia/serif font.
* Script: Courier New, indented speaker names, dialogue blocks.
* Transcript: IBM Plex Sans, timecode prefixes, quoted paragraphs.
*
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - Formatted transcript text from one of the format* functions.
* @param {string} style - Export style key: 'script' | 'professional' | 'transcript'.
* @param {Object.<string, Speaker>} speakers - Speaker map, used to identify names for bold rendering.
* @returns {Promise<void>}
*/
export async function exportDOCX(path, text, style, speakers) {
if (!window.docx) throw new Error('docx not loaded — check the CDN script in index.html');
const { Document, Paragraph, TextRun, Packer, BorderStyle, AlignmentType = {} } = window.docx;
// Fall back to raw OOXML strings if the enum value is missing in this build
const ALIGN = {
center: AlignmentType.CENTER ?? 'center',
justify: AlignmentType.BOTH ?? AlignmentType.JUSTIFIED ?? 'both',
left: AlignmentType.LEFT ?? 'left',
};
const fontMap = { script: 'Courier New', professional: 'Georgia', transcript: 'Calibri' };
const font = fontMap[style] ?? 'Calibri';
const speakerNames = new Set(
Object.entries(speakers).map(([id, s]) => s?.name ?? id)
);
// Regex to detect script-style speaker name lines (10+ leading spaces, uppercase + digits/underscores)
const isScriptSpeaker = line => /^ {10,}[A-Z][A-Z0-9 _'\-()']*$/.test(line);
const paragraphs = [];
let inHeader = true; // true until we pass the first HR separator
for (const line of text.split('\n')) {
// HR → full-width paragraph border instead of ─ characters
if (/^─+$/.test(line.trim())) {
inHeader = false;
paragraphs.push(new Paragraph({
children: [],
border: { bottom: { color: '999999', space: 1, style: BorderStyle.SINGLE, size: 6 } },
}));
continue;
}
if (!line.trim()) { paragraphs.push(new Paragraph({ text: '' })); continue; }
let bold = false;
let alignment;
let color;
if (inHeader) {
alignment = ALIGN.center;
} else if (style === 'professional') {
bold = speakerNames.has(line.trim());
alignment = bold ? ALIGN.center : ALIGN.justify;
} else if (style === 'script') {
const isTimecode = /^\(\d{2}:\d{2}:\d{2}\)/.test(line.trim());
bold = !isTimecode && isScriptSpeaker(line);
color = isTimecode ? '888888' : undefined;
alignment = bold ? ALIGN.center : (isTimecode ? ALIGN.left : ALIGN.justify);
} else {
const isTimecode = /^\[\d{2}:\d{2}:\d{2}\]/.test(line.trim());
color = isTimecode ? '888888' : undefined;
alignment = isTimecode ? ALIGN.left : ALIGN.justify;
}
paragraphs.push(new Paragraph({
children: [new TextRun({ text: (bold || inHeader) ? line.trim() : line, bold, font, color })],
alignment,
}));
}
const doc = new Document({ sections: [{ properties: {}, children: paragraphs }] });
const blob = await Packer.toBlob(doc);
triggerDownload(blob, path);
}
/**
* Writes a PDF export to the given path, applying style-appropriate typography.
* Professional: bold speaker headings, justified body text, Georgia/serif font.
* Script: Courier New, indented speaker names, dialogue blocks.
* Transcript: IBM Plex Sans, timecode prefixes, quoted paragraphs.
*
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - Formatted transcript text from one of the format* functions.
* @param {string} style - Export style key: 'script' | 'professional' | 'transcript'.
* @param {Object.<string, Speaker>} speakers - Speaker map, used to identify names for bold rendering.
* @returns {Promise<void>}
*/
export async function exportPDF(path, text, style, speakers) {
const jsPDF = (window.jspdf ?? window).jsPDF;
if (!jsPDF) throw new Error('jsPDF not loaded — check the CDN script in index.html');
const doc = new jsPDF({ unit: 'pt', format: 'letter' });
const fontMap = { script: 'courier', professional: 'times', transcript: 'helvetica' };
const fontName = fontMap[style] ?? 'helvetica';
const speakerNames = new Set(
Object.entries(speakers).map(([id, s]) => s?.name ?? id)
);
const margin = 72;
const fontSize = 10;
const lineHeight = 14;
const pageWidth = doc.internal.pageSize.getWidth();
const pageHeight = doc.internal.pageSize.getHeight();
const maxWidth = pageWidth - margin * 2;
// Regex to detect script-style speaker name lines (10+ leading spaces, uppercase + digits/underscores)
const isScriptSpeaker = line => /^ {10,}[A-Z][A-Z0-9 _'\-()']*$/.test(line);
/**
* Renders a single line with word-level justification across maxWidth.
*
* @param {string} wline - The text line to render.
* @param {number} xStart - The x coordinate to start rendering from.
* @param {number} yPos - The y coordinate (baseline) for the text.
*/
const drawJustified = (wline, xStart, yPos) => {
const words = wline.trim().split(/\s+/);
if (words.length <= 1) { doc.text(wline.trim(), xStart, yPos); return; }
const totalWordsWidth = words.reduce((sum, w) => sum + doc.getTextWidth(w), 0);
const gap = (maxWidth - totalWordsWidth) / (words.length - 1);
let cx = xStart;
for (const word of words) {
doc.text(word, cx, yPos);
cx += doc.getTextWidth(word) + gap;
}
};
let y = margin;
let inHeader = true; // true until we pass the first HR separator
const checkPage = () => { if (y > pageHeight - margin) { doc.addPage(); y = margin; } };
for (const line of text.split('\n')) {
if (!line.trim()) { y += lineHeight * 0.6; continue; }
// HR → draw a rule spanning the full text area
if (/^─+$/.test(line.trim())) {
inHeader = false;
checkPage();
doc.setDrawColor(160);
doc.setLineWidth(0.5);
doc.line(margin, y - 2, pageWidth - margin, y - 2);
y += lineHeight * 0.5;
continue;
}
let bold = false;
let center = false;
let justify = false;
if (inHeader) {
center = true;
} else if (style === 'professional') {
bold = speakerNames.has(line.trim());
center = bold;
justify = !bold;
} else if (style === 'script') {
const isTimecode = /^\(\d{2}:\d{2}:\d{2}\)/.test(line.trim());
bold = !isTimecode && isScriptSpeaker(line);
center = bold;
justify = !bold && !isTimecode;
} else {
const isTimecode = /^\[\d{2}:\d{2}:\d{2}\]/.test(line.trim());
justify = !isTimecode;
}
doc.setFont(fontName, bold ? 'bold' : 'normal');
doc.setFontSize(fontSize);
const displayLine = center ? line.trim() : line;
const wrapped = doc.splitTextToSize(displayLine, maxWidth);
wrapped.forEach((wline, i) => {
checkPage();
if (center) {
doc.text(wline, pageWidth / 2, y, { align: 'center' });
} else if (justify && i < wrapped.length - 1) {
drawJustified(wline, margin, y);
} else {
doc.text(wline, margin, y);
}
y += lineHeight;
});
}
triggerDownload(doc.output('blob'), path);
}
/**
* Dispatches to the correct file writer for the given file type.
*
* @param {string} fileType - 'txt' | 'md' | 'csv' | 'docx' | 'pdf'
* @param {string} path - Full filesystem path for the output file.
* @param {string} text - Formatted transcript text from format().
* @param {string} style - Export style key passed through to DOCX/PDF writers.
* @param {Object.<string, Speaker>} speakers - Passed through to DOCX/PDF writers.
* @returns {Promise<void>}
*/
export async function exportFile(fileType, path, text, style, speakers) {
switch (fileType) {
case 'txt': return exportTXT(path, text);
case 'md': return exportMarkdown(path, text);
case 'csv': return exportCSV(path, text);
case 'docx': return exportDOCX(path, text, style, speakers);
case 'pdf': return exportPDF(path, text, style, speakers);
default: throw new Error(`Unknown file type: ${fileType}`);
}
}