Source: utilities/export.js


/**
 * Transcript export formatters.
 * Each function accepts a Transcript and a speakers dict (id → Speaker) and returns
 * a plain-text string ready to be written to a file or rendered in a preview.
 */

import { SCRIPT_SPEAKER_INDENT, SCRIPT_DIALOGUE_INDENT } from "./constants.js"

// ── Private helpers ───────────────────────────────────────────────────────────

/**
 * Formats a seconds value as HH:MM:SS.
 *
 * @param {number} totalSeconds - Total number of seconds to format.
 * @returns {string}
 */
function fmtHMS(totalSeconds) {
    const s = Math.floor(totalSeconds);
    const h = Math.floor(s / 3600);
    const m = Math.floor((s % 3600) / 60);
    const sec = s % 60;
    return [h, m, sec].map(n => String(n).padStart(2, '0')).join(':');
}

/**
 * Returns the display name for a speaker id, falling back to the raw id.
 *
 * @param {string} speakerId - The speaker identifier.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @returns {string}
 */
function displayName(speakerId, speakers) {
    return speakers[speakerId]?.name ?? speakerId;
}

/**
 * Returns speaker display names in order of first appearance in the transcript.
 *
 * @param {Transcript} transcript - The transcript to scan.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @returns {string[]}
 */
function speakersInOrder(transcript, speakers) {
    const seen = new Set();
    const names = [];
    for (const seg of transcript.segments) {
        if (!seen.has(seg.speaker)) {
            seen.add(seg.speaker);
            names.push(displayName(seg.speaker, speakers));
        }
    }
    return names;
}

// ── Formatters ────────────────────────────────────────────────────────────────

/**
 * Formats a transcript in screenplay / dialogue-list style.
 * Timecodes appear at each new minute boundary; speaker names are indented and
 * uppercased; dialogue is word-wrapped with a smaller indent.
 *
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @param {object} [options] - Formatting options.
 * @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
 * @param {boolean} [options.includeTimestamps=true] - Whether to insert minute-boundary timecodes.
 * @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
 * @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
 * @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
 * @param {string} [options.title=''] - Document title shown in the header.
 * @param {string} [options.description=''] - Optional description shown beneath the title.
 * @returns {string}
 */
export function formatScript(transcript, speakers, {
    includeSpeakers = false, includeTimestamps = true,
    displayRecordingDate = false, displayExportDate = false,
    recordingDate = '', title = '', description = '',
} = {}) {
    const lines = [];
    const hr = '─'.repeat(42);

    if (title || description || (displayRecordingDate && recordingDate) || displayExportDate) {
        if (title) lines.push(' '.repeat(SCRIPT_SPEAKER_INDENT) + title.toUpperCase());
        if (description) lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + description);
        if (displayRecordingDate && recordingDate)
            lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + `Recorded: ${recordingDate}`);
        if (displayExportDate)
            lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + `Exported: ${new Date().toLocaleDateString()}`);
        lines.push(hr);
    }

    if (includeSpeakers) {
        const names = speakersInOrder(transcript, speakers);
        lines.push('SPEAKERS:', ...names.map(n => '  ' + n));
        lines.push(hr);
    }

    let lastMinute    = -1;
    let afterTimecode = false;
    let lastSpeaker   = null;

    for (const para of transcript.paragraphs) {
        const paraStart     = para.segments[0].start;
        const currentMinute = Math.floor(paraStart / 60);

        if (includeTimestamps && currentMinute !== lastMinute) {
            // New minute — insert timecode block
            if (lines.length > 0 && !afterTimecode) lines.push('');
            lines.push(`(${fmtHMS(currentMinute * 60)})`);
            lines.push('');
            lastMinute    = currentMinute;
            afterTimecode = true;
        } else if (!afterTimecode) {
            // Paragraph break within the same minute (or when timestamps disabled)
            lines.push('');
        }
        afterTimecode = false;

        // Speaker name — uppercase, indented; CONT'D if the speaker didn't change
        const baseName = displayName(para.speaker, speakers).toUpperCase();
        const label    = para.speaker === lastSpeaker ? `${baseName} (CONT'D)` : baseName;
        lastSpeaker    = para.speaker;
        lines.push(' '.repeat(SCRIPT_SPEAKER_INDENT) + label);

        // Dialogue — one continuous line; the file renderer or CSS handles wrapping
        const text = para.segments.map(s => s.text.trim()).filter(Boolean).join(' ');
        lines.push(' '.repeat(SCRIPT_DIALOGUE_INDENT) + text);
    }

    return lines.join('\n').trimEnd();
}

/**
 * Formats a transcript in professional report style.
 * Each speaker block starts with the speaker's name as a standalone header,
 * followed by paragraph text. Multiple paragraphs within a block are separated
 * by a blank line; speaker blocks are separated by a blank line.
 *
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @param {object} [options] - Formatting options.
 * @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
 * @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
 * @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
 * @param {string} [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
 * @param {string} [options.title=''] - Document title shown in the header.
 * @param {string} [options.description=''] - Optional description shown beneath the title.
 * @returns {string}
 */
export function formatProfessional(transcript, speakers, {
    includeSpeakers = false,
    displayRecordingDate = false, displayExportDate = false,
    recordingDate = '', title = '', description = '',
} = {}) {
    const parts = [];
    const hr = '─'.repeat(42);

    if (title || description || (displayRecordingDate && recordingDate) || displayExportDate) {
        const headerLines = [];
        if (title) headerLines.push(title);
        if (description) headerLines.push(description);
        if (displayRecordingDate && recordingDate) headerLines.push(`Recorded: ${recordingDate}`);
        if (displayExportDate) headerLines.push(`Exported: ${new Date().toLocaleDateString()}`);
        headerLines.push(hr);
        parts.push(headerLines.join('\n'));
    }

    if (includeSpeakers) {
        const names = speakersInOrder(transcript, speakers);
        parts.push('Speakers:\n' + names.map(n => '  ' + n).join('\n') + '\n' + hr);
    }

    const blocks = transcript.speakerBlocks.map(block => {
        const name  = displayName(block.speaker, speakers);
        const paras = block.paragraphs
            .map(para => para.segments.map(s => s.text).join(' ').trim())
            .join('\n\n');
        return `${name}\n\n${paras}`;
    });
    parts.push(...blocks);
    return parts.join('\n\n');
}

/**
 * Formats a transcript as a time-stamped transcription document.
 * Includes a metadata header, then each paragraph prefixed with its start
 * timecode and speaker name, with the text quoted on the following line.
 *
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @param {object}  [options] - Formatting options.
 * @param {string}  [options.title=''] - Source title shown in the header.
 * @param {string}  [options.description=''] - Optional description shown beneath the title.
 * @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
 * @param {boolean} [options.includeTimestamps=true] - Whether to prefix paragraphs with timecodes.
 * @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
 * @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
 * @param {string}  [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
 * @returns {string}
 */
export function formatTranscript(transcript, speakers, {
    title = '', description = '', includeSpeakers = false,
    includeTimestamps = true,
    displayRecordingDate = false, displayExportDate = false,
    recordingDate = '',
} = {}) {
    const hr = '─'.repeat(42);

    const headerLines = [ title || '[Audio/Video Title]' ];
    if (description) headerLines.push(description);
    if (displayRecordingDate && recordingDate) headerLines.push(`Recorded: ${recordingDate}`);
    if (displayExportDate) headerLines.push(`Exported: ${new Date().toLocaleDateString()}`);
    if (includeSpeakers) {
        const names = speakersInOrder(transcript, speakers);
        headerLines.push(hr, '', `Speakers: ${names.join(', ')}`);
    }
    headerLines.push(hr, '', 'Transcript:');

    const header = headerLines.join('\n');

    const body = transcript.paragraphs.map(para => {
        const name = displayName(para.speaker, speakers);
        const text = para.segments.map(s => s.text).join(' ').trim();
        if (includeTimestamps) {
            const t = fmtHMS(para.segments[0].start);
            return `[${t}] ${name}:\n"${text}"`;
        }
        return `${name}:\n"${text}"`;
    }).join('\n\n');

    return `${header}\n\n${body}`;
}

/**
 * Formats a transcript as Markdown.
 * The document title becomes an H1; speaker names become H2 headings;
 * paragraph text follows as body text. Optional timestamps appear as
 * inline code spans before each paragraph.
 *
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @param {object}  [options] - Formatting options.
 * @param {boolean} [options.includeSpeakers=false] - Whether to prepend a speaker list.
 * @param {boolean} [options.includeTimestamps=true] - Whether to prefix paragraphs with timecodes.
 * @param {boolean} [options.displayRecordingDate=false] - Whether to show the recording date in the header.
 * @param {boolean} [options.displayExportDate=false] - Whether to show today's date in the header.
 * @param {string}  [options.recordingDate=''] - Recording date string shown when displayRecordingDate is true.
 * @param {string}  [options.title=''] - Document title rendered as an H1 heading.
 * @param {string}  [options.description=''] - Optional description shown beneath the title.
 * @returns {string}
 */
export function formatMarkdown(transcript, speakers, {
    includeSpeakers = false, includeTimestamps = true,
    displayRecordingDate = false, displayExportDate = false,
    recordingDate = '', title = '', description = '',
} = {}) {
    const lines = [];

    if (title) lines.push(`# ${title}`, '');
    if (description) lines.push(description, '');
    if (displayRecordingDate && recordingDate) lines.push(`*Recorded: ${recordingDate}*`, '');
    if (displayExportDate) lines.push(`*Exported: ${new Date().toLocaleDateString()}*`, '');
    if (lines.length) lines.push('---', '');

    if (includeSpeakers) {
        const names = speakersInOrder(transcript, speakers);
        lines.push(`**Speakers:** ${names.join(', ')}`, '', '---', '');
    }

    for (const block of transcript.speakerBlocks) {
        const name = displayName(block.speaker, speakers);
        lines.push(`#### ${name}`, '');
        for (const para of block.paragraphs) {
            const text = para.segments.map(s => s.text).join(' ').trim();
            if (includeTimestamps) {
                const t = fmtHMS(para.segments[0].start);
                lines.push(`\`[${t}]\` ${text}`, '');
            } else {
                lines.push(text, '');
            }
        }
    }

    return lines.join('\n').trimEnd();
}

/**
 * Formats a transcript as an RFC 4180 CSV with columns: start, end, speaker, text.
 * Speaker IDs are replaced with their display names.
 *
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @returns {string}
 */
export function formatCSV(transcript, speakers) {
    const escape = v => /[",\n]/.test(String(v)) ? `"${String(v).replace(/"/g, '""')}"` : String(v);
    const rows = transcript.segments.map(s =>
        [s.start, s.end, escape(displayName(s.speaker, speakers)), escape(s.text)].join(',')
    );
    return ['start,end,speaker,text', ...rows].join('\n');
}

/**
 * Dispatches to the correct formatter for the given style key.
 *
 * @param {string}   style      - 'script' | 'professional' | 'transcript' | 'csv'
 * @param {Transcript} transcript - The transcript to format.
 * @param {Object.<string, Speaker>} speakers - Speaker id → Speaker map.
 * @param {object}   [options]  - Passed through to formatTranscript.
 * @returns {string}
 */
export function format(style, transcript, speakers, options = {}) {
    switch (style) {
        case 'script':       return formatScript(transcript, speakers, options);
        case 'professional': return formatProfessional(transcript, speakers, options);
        case 'transcript':   return formatTranscript(transcript, speakers, options);
        case 'md':           return formatMarkdown(transcript, speakers, options);
        case 'csv':          return formatCSV(transcript, speakers);
        default:             return formatTranscript(transcript, speakers, options);
    }
}

// ── File writers ──────────────────────────────────────────────────────────────

/**
 * Triggers a browser file download for the given Blob.
 *
 * @param {Blob} blob - The file content as a Blob.
 * @param {string} filename - The suggested download filename.
 */
function triggerDownload(blob, filename) {
    const url = URL.createObjectURL(blob);
    const a   = document.createElement('a');
    a.href     = url;
    a.download = filename;
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
    URL.revokeObjectURL(url);
}

/**
 * Writes a plain-text export to the given path.
 *
 * @param {string} path  - Full filesystem path for the output file.
 * @param {string} text  - Formatted transcript text from one of the format* functions.
 * @returns {Promise<void>}
 */
export async function exportTXT(path, text) {
    triggerDownload(new Blob([text], { type: 'text/plain;charset=utf-8' }), path);
}

/**
 * Writes a Markdown export to the given path.
 * Speaker names become bold headings; paragraph text follows beneath each.
 *
 * @param {string} path  - Full filesystem path for the output file.
 * @param {string} text  - Formatted transcript text from one of the format* functions.
 * @returns {Promise<void>}
 */
export async function exportMarkdown(path, text) {
    triggerDownload(new Blob([text], { type: 'text/markdown;charset=utf-8' }), path);
}

/**
 * Writes a CSV export to the given path.
 *
 * @param {string} path  - Full filesystem path for the output file.
 * @param {string} text  - CSV string from formatCSV().
 * @returns {Promise<void>}
 */
export async function exportCSV(path, text) {
    triggerDownload(new Blob([text], { type: 'text/csv;charset=utf-8' }), path);
}

/**
 * Writes a DOCX export to the given path, applying style-appropriate typography.
 * Professional: bold speaker headings, justified body text, Georgia/serif font.
 * Script: Courier New, indented speaker names, dialogue blocks.
 * Transcript: IBM Plex Sans, timecode prefixes, quoted paragraphs.
 *
 * @param {string} path   - Full filesystem path for the output file.
 * @param {string} text   - Formatted transcript text from one of the format* functions.
 * @param {string} style  - Export style key: 'script' | 'professional' | 'transcript'.
 * @param {Object.<string, Speaker>} speakers - Speaker map, used to identify names for bold rendering.
 * @returns {Promise<void>}
 */
export async function exportDOCX(path, text, style, speakers) {
    if (!window.docx) throw new Error('docx not loaded — check the CDN script in index.html');
    const { Document, Paragraph, TextRun, Packer, BorderStyle, AlignmentType = {} } = window.docx;
    // Fall back to raw OOXML strings if the enum value is missing in this build
    const ALIGN = {
        center:  AlignmentType.CENTER  ?? 'center',
        justify: AlignmentType.BOTH    ?? AlignmentType.JUSTIFIED ?? 'both',
        left:    AlignmentType.LEFT    ?? 'left',
    };

    const fontMap = { script: 'Courier New', professional: 'Georgia', transcript: 'Calibri' };
    const font    = fontMap[style] ?? 'Calibri';
    const speakerNames = new Set(
        Object.entries(speakers).map(([id, s]) => s?.name ?? id)
    );

    // Regex to detect script-style speaker name lines (10+ leading spaces, uppercase + digits/underscores)
    const isScriptSpeaker = line => /^ {10,}[A-Z][A-Z0-9 _'\-()']*$/.test(line);

    const paragraphs = [];
    let inHeader = true; // true until we pass the first HR separator

    for (const line of text.split('\n')) {
        // HR → full-width paragraph border instead of ─ characters
        if (/^─+$/.test(line.trim())) {
            inHeader = false;
            paragraphs.push(new Paragraph({
                children: [],
                border: { bottom: { color: '999999', space: 1, style: BorderStyle.SINGLE, size: 6 } },
            }));
            continue;
        }

        if (!line.trim()) { paragraphs.push(new Paragraph({ text: '' })); continue; }

        let bold = false;
        let alignment;
        let color;

        if (inHeader) {
            alignment = ALIGN.center;
        } else if (style === 'professional') {
            bold      = speakerNames.has(line.trim());
            alignment = bold ? ALIGN.center : ALIGN.justify;
        } else if (style === 'script') {
            const isTimecode = /^\(\d{2}:\d{2}:\d{2}\)/.test(line.trim());
            bold      = !isTimecode && isScriptSpeaker(line);
            color     = isTimecode ? '888888' : undefined;
            alignment = bold ? ALIGN.center : (isTimecode ? ALIGN.left : ALIGN.justify);
        } else {
            const isTimecode = /^\[\d{2}:\d{2}:\d{2}\]/.test(line.trim());
            color     = isTimecode ? '888888' : undefined;
            alignment = isTimecode ? ALIGN.left : ALIGN.justify;
        }

        paragraphs.push(new Paragraph({
            children: [new TextRun({ text: (bold || inHeader) ? line.trim() : line, bold, font, color })],
            alignment,
        }));
    }

    const doc  = new Document({ sections: [{ properties: {}, children: paragraphs }] });
    const blob = await Packer.toBlob(doc);
    triggerDownload(blob, path);
}

/**
 * Writes a PDF export to the given path, applying style-appropriate typography.
 * Professional: bold speaker headings, justified body text, Georgia/serif font.
 * Script: Courier New, indented speaker names, dialogue blocks.
 * Transcript: IBM Plex Sans, timecode prefixes, quoted paragraphs.
 *
 * @param {string} path   - Full filesystem path for the output file.
 * @param {string} text   - Formatted transcript text from one of the format* functions.
 * @param {string} style  - Export style key: 'script' | 'professional' | 'transcript'.
 * @param {Object.<string, Speaker>} speakers - Speaker map, used to identify names for bold rendering.
 * @returns {Promise<void>}
 */
export async function exportPDF(path, text, style, speakers) {
    const jsPDF = (window.jspdf ?? window).jsPDF;
    if (!jsPDF) throw new Error('jsPDF not loaded — check the CDN script in index.html');
    const doc = new jsPDF({ unit: 'pt', format: 'letter' });

    const fontMap  = { script: 'courier', professional: 'times', transcript: 'helvetica' };
    const fontName = fontMap[style] ?? 'helvetica';
    const speakerNames = new Set(
        Object.entries(speakers).map(([id, s]) => s?.name ?? id)
    );

    const margin     = 72;
    const fontSize   = 10;
    const lineHeight = 14;
    const pageWidth  = doc.internal.pageSize.getWidth();
    const pageHeight = doc.internal.pageSize.getHeight();
    const maxWidth   = pageWidth - margin * 2;

    // Regex to detect script-style speaker name lines (10+ leading spaces, uppercase + digits/underscores)
    const isScriptSpeaker = line => /^ {10,}[A-Z][A-Z0-9 _'\-()']*$/.test(line);

    /**
     * Renders a single line with word-level justification across maxWidth.
     *
     * @param {string} wline - The text line to render.
     * @param {number} xStart - The x coordinate to start rendering from.
     * @param {number} yPos - The y coordinate (baseline) for the text.
     */
    const drawJustified = (wline, xStart, yPos) => {
        const words = wline.trim().split(/\s+/);
        if (words.length <= 1) { doc.text(wline.trim(), xStart, yPos); return; }
        const totalWordsWidth = words.reduce((sum, w) => sum + doc.getTextWidth(w), 0);
        const gap = (maxWidth - totalWordsWidth) / (words.length - 1);
        let cx = xStart;
        for (const word of words) {
            doc.text(word, cx, yPos);
            cx += doc.getTextWidth(word) + gap;
        }
    };

    let y        = margin;
    let inHeader = true; // true until we pass the first HR separator
    const checkPage = () => { if (y > pageHeight - margin) { doc.addPage(); y = margin; } };

    for (const line of text.split('\n')) {
        if (!line.trim()) { y += lineHeight * 0.6; continue; }

        // HR → draw a rule spanning the full text area
        if (/^─+$/.test(line.trim())) {
            inHeader = false;
            checkPage();
            doc.setDrawColor(160);
            doc.setLineWidth(0.5);
            doc.line(margin, y - 2, pageWidth - margin, y - 2);
            y += lineHeight * 0.5;
            continue;
        }

        let bold    = false;
        let center  = false;
        let justify = false;

        if (inHeader) {
            center = true;
        } else if (style === 'professional') {
            bold    = speakerNames.has(line.trim());
            center  = bold;
            justify = !bold;
        } else if (style === 'script') {
            const isTimecode = /^\(\d{2}:\d{2}:\d{2}\)/.test(line.trim());
            bold    = !isTimecode && isScriptSpeaker(line);
            center  = bold;
            justify = !bold && !isTimecode;
        } else {
            const isTimecode = /^\[\d{2}:\d{2}:\d{2}\]/.test(line.trim());
            justify = !isTimecode;
        }

        doc.setFont(fontName, bold ? 'bold' : 'normal');
        doc.setFontSize(fontSize);

        const displayLine = center ? line.trim() : line;
        const wrapped     = doc.splitTextToSize(displayLine, maxWidth);
        wrapped.forEach((wline, i) => {
            checkPage();
            if (center) {
                doc.text(wline, pageWidth / 2, y, { align: 'center' });
            } else if (justify && i < wrapped.length - 1) {
                drawJustified(wline, margin, y);
            } else {
                doc.text(wline, margin, y);
            }
            y += lineHeight;
        });
    }

    triggerDownload(doc.output('blob'), path);
}

/**
 * Dispatches to the correct file writer for the given file type.
 *
 * @param {string} fileType - 'txt' | 'md' | 'csv' | 'docx' | 'pdf'
 * @param {string} path     - Full filesystem path for the output file.
 * @param {string} text     - Formatted transcript text from format().
 * @param {string} style    - Export style key passed through to DOCX/PDF writers.
 * @param {Object.<string, Speaker>} speakers - Passed through to DOCX/PDF writers.
 * @returns {Promise<void>}
 */
export async function exportFile(fileType, path, text, style, speakers) {
    switch (fileType) {
        case 'txt':  return exportTXT(path, text);
        case 'md':   return exportMarkdown(path, text);
        case 'csv':  return exportCSV(path, text);
        case 'docx': return exportDOCX(path, text, style, speakers);
        case 'pdf':  return exportPDF(path, text, style, speakers);
        default: throw new Error(`Unknown file type: ${fileType}`);
    }
}