Source: utilities/audio.js

/**
* @module audio
*/
import { PEAKS_PER_SECOND, LARGE_FILE_THRESHOLD_BYTES, SUPPORTED_AUDIO_EXTENSIONS, CHUNK_SIZE_BYTES } from "./constants.js";

/**
 * Returns true if the File appears to be a supported audio file.
 * Checks the MIME type first; if absent or video/mp4, falls back to the
 * file extension to handle OS/browser inconsistencies.
 * @param {File} file - the file to test
 * @returns {boolean}
 */
export function isAudioFile(file) {
    if (!file) return false;
    if (file.type.startsWith('audio/')) return true;
    // Fallback: trust the file extension when MIME type is empty or mis-reported
    const dot = file.name.lastIndexOf('.');
    if (dot === -1) return false;
    return SUPPORTED_AUDIO_EXTENSIONS.has(file.name.slice(dot).toLowerCase());
}

/**
 * Returns the duration of a media URL by loading it into a temporary Audio
 * element and waiting for 'loadedmetadata'. No full decode is performed.
 * @param {string} url - the audio URL to measure
 * @returns {Promise<number>} duration in seconds (0 on error)
 */
function _getDurationFromUrl(url) {
    return new Promise((resolve) => {
        const audio = new Audio();
        audio.addEventListener('loadedmetadata', () => resolve(audio.duration), { once: true });
        audio.addEventListener('error', () => resolve(0), { once: true });
        audio.src = url;
    });
}

/**
 * Extracts waveform peaks from a large audio file by decoding it in
 * CHUNK_SIZE_BYTES slices. Only one decoded buffer is held in memory at a
 * time, so peak memory stays well under 100 MB regardless of file length.
 *
 * Each slice is passed to decodeAudioData independently. MP3 frames are
 * self-contained so boundary splits cause at most a few milliseconds of
 * silence; visually this is invisible on a waveform. Slices that fail to
 * decode (e.g. unsupported container fragments) contribute zero peaks.
 *
 * WAV files are handled specially: each PCM chunk is prepended with the
 * original file's RIFF header (with sizes patched) before being passed to
 * decodeAudioData. This avoids the headerless-chunk problem and delegates
 * all format-specific decoding (PCM, float, extensible, etc.) to the
 * browser's native decoder rather than reimplementing it in JavaScript.
 *
 * @param {File} file - audio file to decode in chunks
 * @param {function(number):void} [onProgress] - called after each chunk with fraction 0–1
 * @param {number} [knownDuration] - duration in seconds if already known, skips a metadata fetch
 * @returns {Promise<{url, sampleRate, peaks, duration, filename}>}
 */
async function _extractPeaksChunked(file, onProgress, knownDuration) {
    // ── WAV special case ─────────────────────────────────────────────────────
    // WAV files have a single RIFF header at the start; plain byte slices from
    // chunk 2 onwards have no header and decodeAudioData rejects them, filling
    // those regions with zeros. Fix: locate the PCM data region once, then
    // prepend the original header bytes to every chunk before decoding.
    const isWav = file.type === 'audio/wav' || file.type === 'audio/x-wav'
               || file.name.toLowerCase().endsWith('.wav');

    let wavHeaderTemplate = null; // ArrayBuffer containing bytes [0 .. dataOffset)
    let wavDataOffset     = 0;    // byte offset of PCM data within the file
    let wavDataSize       = 0;    // byte length of PCM data region
    let wavBytesPerFrame  = 0;    // bytes per interleaved audio frame (channels × bytesPerSample)

    if (isWav) {
        // Locate the fmt and data chunks by scanning the file header.
        // Professional recordings often embed large metadata blocks (iXML, bext,
        // axml, large LIST chunks, etc.) that can push the data chunk well past
        // 64 KB. We scan in doubling windows — starting at 64 KB and growing up
        // to 100 MB — so the common case stays fast while exotic files still work.
        const MAX_HEADER_SCAN = Math.min(100 * 1024 * 1024, file.size);
        let scanSize = Math.min(65536, file.size);
        let scanBuf  = await file.slice(0, scanSize).arrayBuffer();
        let scanView = new DataView(scanBuf);

        const readTag = (off) => String.fromCharCode(
            scanView.getUint8(off), scanView.getUint8(off+1),
            scanView.getUint8(off+2), scanView.getUint8(off+3));

        // RF64 (BW64) uses 'RF64' instead of 'RIFF' for files > 4 GB; the rest
        // of the chunk layout is identical. The 'data' chunk size field is
        // 0xFFFFFFFF in RF64 — chunkRegionSize = Math.min(wavDataSize, file.size−wavDataOffset)
        // handles that sentinel correctly without needing to parse the ds64 chunk.
        const magic = scanBuf.byteLength >= 12 ? readTag(0) : '';
        const form  = scanBuf.byteLength >= 12 ? readTag(8) : '';
        if ((magic === 'RIFF' || magic === 'RF64') && form === 'WAVE') {
            let pos = 12;

            scanLoop: while (pos < file.size) {
                // Expand the buffer as needed to read the next chunk header (8 bytes)
                while (pos + 8 > scanSize && scanSize < MAX_HEADER_SCAN) {
                    scanSize = Math.min(scanSize * 2, MAX_HEADER_SCAN, file.size);
                    scanBuf  = await file.slice(0, scanSize).arrayBuffer();
                    scanView = new DataView(scanBuf);
                }
                if (pos + 8 > scanBuf.byteLength) break;

                const id      = readTag(pos);
                const chunkSz = scanView.getUint32(pos + 4, true);

                if (id === 'fmt ') {
                    // Expand if needed to read the format fields (need at least pos+24)
                    while (pos + 24 > scanSize && scanSize < MAX_HEADER_SCAN) {
                        scanSize = Math.min(scanSize * 2, MAX_HEADER_SCAN, file.size);
                        scanBuf  = await file.slice(0, scanSize).arrayBuffer();
                        scanView = new DataView(scanBuf);
                    }
                    if (pos + 24 <= scanBuf.byteLength) {
                        const nCh = scanView.getUint16(pos + 10, true);
                        const bps = scanView.getUint16(pos + 22, true);
                        wavBytesPerFrame = nCh * (bps >> 3);
                    }
                } else if (id === 'data') {
                    wavDataOffset = pos + 8;
                    wavDataSize   = chunkSz;
                    break scanLoop;
                }

                if (chunkSz === 0) break; // guard: stop on malformed zero-size chunk
                pos += 8 + chunkSz + (chunkSz & 1);
            }
        }

        if (wavDataOffset > 0) {
            // Read the exact header bytes (everything before the PCM data) as a
            // reusable template that will be prepended to every decoded chunk.
            wavHeaderTemplate = await file.slice(0, wavDataOffset).arrayBuffer();
            console.log(`[audio] WAV header: dataOffset=${wavDataOffset}, bytesPerFrame=${wavBytesPerFrame}, headerScannedTo=${scanSize}`);
        } else {
            console.warn(`[audio] WAV data chunk not found in header scan (magic='${magic}', form='${form}') — falling back to whole-file chunking (only chunk 0 will decode)`);
        }
        // If we couldn't parse the header, fall through — decodeAudioData will
        // at least succeed on chunk 0 and fail silently on the rest.
    }

    // For WAV, align the chunk size to a whole number of frames so that every
    // chunk starts on a valid frame boundary. Without this, formats whose frame
    // size does not divide CHUNK_SIZE_BYTES evenly (e.g. 24-bit stereo = 6 bytes/
    // frame, 4 194 304 mod 6 = 4) produce misaligned PCM slices from chunk 2
    // onwards, causing decodeAudioData to misinterpret sample boundaries and
    // return near-maximum-amplitude garbage values.
    const effectiveChunkSize = (wavBytesPerFrame > 1)
        ? Math.floor(CHUNK_SIZE_BYTES / wavBytesPerFrame) * wavBytesPerFrame
        : CHUNK_SIZE_BYTES;
    const url = URL.createObjectURL(file);
    const duration = knownDuration ?? await _getDurationFromUrl(url);

    // For WAV, iterate only over the PCM data region (header is prepended separately).
    // For all other formats, iterate over the whole file as before.
    const chunkRegionStart = wavHeaderTemplate ? wavDataOffset : 0;
    const chunkRegionSize  = wavHeaderTemplate
        ? Math.min(wavDataSize, file.size - wavDataOffset)
        : file.size;

    const numChunks = Math.ceil(chunkRegionSize / effectiveChunkSize);
    const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);

    const allPeaks = new Float32Array(peakCount);
    let peaksWritten = 0;
    let detectedSampleRate = 44100;

    const ac = new (window.AudioContext || window.webkitAudioContext)();

    for (let i = 0; i < numChunks; i++) {
        const byteStart = chunkRegionStart + i * effectiveChunkSize;
        const byteEnd   = Math.min(byteStart + effectiveChunkSize, chunkRegionStart + chunkRegionSize);
        // Allocate peaks proportional to this chunk's share of the audio data
        const chunkPeakCount = Math.max(1, Math.round(peakCount * (byteEnd - byteStart) / chunkRegionSize));

        try {
            let arrayBuf = await file.slice(byteStart, byteEnd).arrayBuffer();

            if (wavHeaderTemplate) {
                // Build a valid WAV from the original header + this PCM chunk,
                // then patch the RIFF and data-chunk size fields so the browser
                // accepts it as a standalone audio file.
                const combined = new ArrayBuffer(wavHeaderTemplate.byteLength + arrayBuf.byteLength);
                const dst = new Uint8Array(combined);
                dst.set(new Uint8Array(wavHeaderTemplate));
                dst.set(new Uint8Array(arrayBuf), wavHeaderTemplate.byteLength);
                const pv = new DataView(combined);
                pv.setUint32(4, combined.byteLength - 8, true);           // RIFF size
                pv.setUint32(wavDataOffset - 4, arrayBuf.byteLength, true); // data chunk size
                arrayBuf = combined;
            }

            const audioBuffer = await ac.decodeAudioData(arrayBuf);
            if (i === 0) detectedSampleRate = audioBuffer.sampleRate;

            // Pre-fetch all channel data arrays so the hot loop only touches typed arrays
            const numChannels = audioBuffer.numberOfChannels;
            const channels = Array.from({ length: numChannels }, (_, c) => audioBuffer.getChannelData(c));
            const n = audioBuffer.length;
            const blockSize = n / chunkPeakCount;

            for (let p = 0; p < chunkPeakCount && peaksWritten < peakCount; p++) {
                let max = 0;
                const s = Math.floor(p * blockSize);
                const e = Math.min(Math.floor(s + blockSize), n);
                for (let j = s; j < e; j++) {
                    // Average across channels, then take absolute value
                    let sample = channels[0][j];
                    for (let c = 1; c < numChannels; c++) sample += channels[c][j];
                    const v = Math.abs(sample / numChannels);
                    if (v > max) max = v;
                }
                allPeaks[peaksWritten++] = max;
            }
        } catch {
            // Slice failed to decode — advance the write cursor with zeros
            peaksWritten = Math.min(peakCount, peaksWritten + chunkPeakCount);
        }

        if (onProgress) onProgress((i + 1) / numChunks);
    }

    await ac.close();

    const finalPeaks = allPeaks.slice(0, peaksWritten);
    const max  = finalPeaks.reduce((m, v) => Math.max(m, v), 0);
    const mean = finalPeaks.reduce((s, v) => s + v, 0) / (finalPeaks.length || 1);
    console.log(
        `[audio] chunked peaks: ${peaksWritten} values, ` +
        `max=${max.toFixed(4)}, mean=${mean.toFixed(4)}, ` +
        `first10=[${Array.from(finalPeaks.slice(0, 10)).map(v => v.toFixed(3)).join(',')}]`
    );

    return {
        url,
        file,
        sampleRate: detectedSampleRate,
        peaks: [finalPeaks],
        duration,
        filename: file.name,
    };
}

/**
 * Extracts waveform peaks from a remote audio URL by fetching it in
 * CHUNK_SIZE_BYTES slices via HTTP Range requests and decoding each chunk
 * independently. Peak memory stays under ~100 MB regardless of file length.
 *
 * Returns null if the server does not advertise a Content-Length (chunking
 * is impossible without knowing the total size) or rejects Range requests,
 * so callers should fall back to the standard WaveSurfer decode path.
 *
 * @param {string} url - URL to an audio resource that supports Range requests
 * @param {object} [opts] - optional configuration object
 * @param {function(number):void} [opts.onProgress] - called after each chunk with fraction 0–1
 * @returns {Promise<{peaks: Float32Array[], sampleRate: number, duration: number}|null>}
 */
export async function extractPeaksFromUrl(url, { onProgress } = {}) {
    // Resolve total byte count and duration concurrently without downloading audio
    const [headRes, duration] = await Promise.all([
        fetch(url, { method: 'HEAD' }),
        _getDurationFromUrl(url),
    ]);
    const totalBytes = parseInt(headRes.headers.get('Content-Length') || '0');
    if (!totalBytes || !duration) return null; // can't chunk without size + duration

    const numChunks = Math.ceil(totalBytes / CHUNK_SIZE_BYTES);
    const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);

    const allPeaks = new Float32Array(peakCount);
    let peaksWritten = 0;
    let detectedSampleRate = 44100;

    const ac = new (window.AudioContext || window.webkitAudioContext)();

    for (let i = 0; i < numChunks; i++) {
        const byteStart = i * CHUNK_SIZE_BYTES;
        const byteEnd   = Math.min(byteStart + CHUNK_SIZE_BYTES - 1, totalBytes - 1);
        const chunkPeakCount = Math.max(1, Math.round(peakCount * (byteEnd - byteStart + 1) / totalBytes));

        try {
            const res = await fetch(url, { headers: { Range: `bytes=${byteStart}-${byteEnd}` } });
            // 206 = server honoured the Range; 200 means it returned the full file
            if (i === 0 && res.status !== 206) {
                await ac.close();
                return null; // server doesn't support Range requests — caller falls back
            }
            const arrayBuf = await res.arrayBuffer();
            const audioBuffer = await ac.decodeAudioData(arrayBuf);
            if (i === 0) detectedSampleRate = audioBuffer.sampleRate;

            const numChannels = audioBuffer.numberOfChannels;
            const channels = Array.from({ length: numChannels }, (_, c) => audioBuffer.getChannelData(c));
            const n = audioBuffer.length;
            const blockSize = n / chunkPeakCount;

            for (let p = 0; p < chunkPeakCount && peaksWritten < peakCount; p++) {
                let max = 0;
                const s = Math.floor(p * blockSize);
                const e = Math.min(Math.floor(s + blockSize), n);
                for (let j = s; j < e; j++) {
                    let sample = channels[0][j];
                    for (let c = 1; c < numChannels; c++) sample += channels[c][j];
                    const v = Math.abs(sample / numChannels);
                    if (v > max) max = v;
                }
                allPeaks[peaksWritten++] = max;
            }
        } catch {
            peaksWritten = Math.min(peakCount, peaksWritten + chunkPeakCount);
        }

        if (onProgress) onProgress((i + 1) / numChunks);
    }

    await ac.close();
    return {
        peaks: [allPeaks.slice(0, peaksWritten)],
        sampleRate: detectedSampleRate,
        duration,
    };
}

// Files whose estimated decoded PCM would exceed this use chunked decoding.
// Estimated as: duration * 48000 Hz * 2 channels * 4 bytes/sample (worst case).
// 600 MB ≈ 36 minutes of stereo 44.1 kHz audio — keeps peak memory under ~1.5 GB
// on the small path (arrayBuffer + decoded PCM + mono copy + WAV blob).
const DECODED_SIZE_THRESHOLD_BYTES = 600 * 1024 * 1024;

/**
 * Reads an audio file, decodes it via the Web Audio API, mixes all channels
 * down to mono, extracts waveform peaks via a Web Worker, and creates a
 * streamable object URL (mono WAV) for playback. Has no knowledge of AppState
 * and can be used independently of the wider application.
 *
 * For files larger than LARGE_FILE_THRESHOLD_BYTES the audio is decoded in
 * small chunks so that peak memory never exceeds ~100 MB regardless of file
 * length (see _extractPeaksChunked).
 *
 * The caller is responsible for revoking the returned URL when it is no longer
 * needed: `URL.revokeObjectURL(url)`.
 *
 * @param {File} file - audio file to load; any format supported by the browser
 * @param {object} [opts] - optional configuration object
 * @param {function(number):void} [opts.onProgress] - progress callback, fraction 0–1 (large files only)
 * @returns {Promise<{url: string, sampleRate: number, peaks: Float32Array[], duration: number, filename: string}>}
 *   url        - object URL suitable for passing to WaveSurfer.load()
 *   sampleRate - the sample rate of the decoded audio in Hz
 *   peaks      - one Float32Array of absolute peak values (mono mix)
 *   duration   - total duration of the audio in seconds
 *   filename   - the original file name
 */
export async function loadAudioFile(file, { onProgress } = {}) {
    const mb = (file.size / (1024 * 1024)).toFixed(1);

    if (file.size > LARGE_FILE_THRESHOLD_BYTES) {
        return _extractPeaksChunked(file, onProgress);
    }

    // For small files, also check estimated decoded PCM size. Highly-compressed
    // audio (e.g. low-bitrate MP3) can be tiny on disk but decode to gigabytes.
    const tempUrl = URL.createObjectURL(file);
    const duration = await _getDurationFromUrl(tempUrl);
    URL.revokeObjectURL(tempUrl);
    const estimatedDecodedBytes = duration * 48000 * 2 * 4; // conservative worst case
    if (estimatedDecodedBytes > DECODED_SIZE_THRESHOLD_BYTES) {
        return _extractPeaksChunked(file, onProgress, duration);
    }

    const arrayBuf = await file.arrayBuffer();
    const ac = new (window.AudioContext || window.webkitAudioContext)();
    const audioBuffer = await ac.decodeAudioData(arrayBuf);

    const sampleRate = audioBuffer.sampleRate;
    const totalSamples = audioBuffer.length;
    const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);

    // Mix all channels down to mono
    const numChannels = audioBuffer.numberOfChannels;
    const monoData = new Float32Array(totalSamples);
    for (let c = 0; c < numChannels; c++) {
        const ch = audioBuffer.getChannelData(c);
        for (let i = 0; i < totalSamples; i++) monoData[i] += ch[i];
    }
    if (numChannels > 1) {
        for (let i = 0; i < totalSamples; i++) monoData[i] /= numChannels;
    }
    ac.close();

    const monoBuffer = [monoData.buffer.slice(0)];

    const [peaks, url] = await Promise.all([
        extractPeaks(monoBuffer, peakCount),
        Promise.resolve(URL.createObjectURL(encodeMonoWav(monoData, sampleRate))),
    ]);

    const p = peaks[0];
    const max  = p.reduce((m, v) => Math.max(m, v), 0);
    const mean = p.reduce((s, v) => s + v, 0) / (p.length || 1);
    console.log(
        `[audio] small-path peaks: ${p.length} values, ` +
        `max=${max.toFixed(4)}, mean=${mean.toFixed(4)}, ` +
        `first10=[${Array.from(p.slice(0, 10)).map(v => v.toFixed(3)).join(',')}]`
    );

    return { url, sampleRate, peaks, duration, filename: file.name };
}

/**
 * Decodes an audio ArrayBuffer and returns an AudioBuffer, a 120-bar peaks
 * array, and a blobUrl suitable for re-upload. The ArrayBuffer is consumed by
 * decodeAudioData; the returned blobUrl is re-encoded from the decoded data.
 * @param {ArrayBuffer} arrayBuffer - raw audio data to decode
 * @returns {Promise<{audioBuffer: AudioBuffer, blobUrl: string, peaks: number[]}>}
 */
export async function decodeSampleArrayBuffer(arrayBuffer) {
    const actx = new (window.AudioContext || window.webkitAudioContext)();
    const audioBuffer = await actx.decodeAudioData(arrayBuffer);
    const data = audioBuffer.getChannelData(0);
    const numBars = 120;
    const blockSize = Math.floor(data.length / numBars);
    const peaks = [];
    for (let i = 0; i < numBars; i++) {
        let max = 0;
        for (let j = 0; j < blockSize; j++) max = Math.max(max, Math.abs(data[i * blockSize + j]));
        peaks.push(max);
    }
    actx.close();
    const blobUrl = URL.createObjectURL(encodeMonoWav(data, audioBuffer.sampleRate));
    return { audioBuffer, blobUrl, peaks };
}

/**
 * Encodes a Float32Array of mono PCM samples as a 16-bit WAV Blob.
 * @param {Float32Array} samples - mono PCM data in the range [-1, 1]
 * @param {number} sampleRate - sample rate in Hz for the WAV header
 * @returns {Blob} WAV blob with audio/wav MIME type
 */
export function encodeMonoWav(samples, sampleRate) {
    const bytesPerSample = 2; // 16-bit PCM
    const dataSize = samples.length * bytesPerSample;
    const buf = new ArrayBuffer(44 + dataSize);
    const v = new DataView(buf);
    const str = (off, s) => { for (let i = 0; i < s.length; i++) v.setUint8(off + i, s.charCodeAt(i)); };

    str(0,  'RIFF');  v.setUint32( 4, 36 + dataSize, true);
    str(8,  'WAVE');
    str(12, 'fmt ');  v.setUint32(16, 16, true);
    v.setUint16(20, 1, true);                              // PCM
    v.setUint16(22, 1, true);                              // mono
    v.setUint32(24, sampleRate, true);
    v.setUint32(28, sampleRate * bytesPerSample, true);    // byte rate
    v.setUint16(32, bytesPerSample, true);                 // block align
    v.setUint16(34, 16, true);                             // bits per sample
    str(36, 'data'); v.setUint32(40, dataSize, true);

    let off = 44;
    for (let i = 0; i < samples.length; i++) {
        const s = Math.max(-1, Math.min(1, samples[i]));
        v.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
        off += 2;
    }
    return new Blob([buf], { type: 'audio/wav' });
}

/**
 * Extracts per-channel waveform peaks from raw PCM channel buffers using an
 * inline Web Worker, keeping the UI thread free during the computation. Each
 * channel is downsampled to peakCount blocks, with each block's value being
 * the maximum absolute amplitude of the samples within it.
 * @param {ArrayBuffer[]} channelBuffers - raw PCM data, one ArrayBuffer per channel
 * @param {number} peakCount - number of peak samples to extract per channel
 * @returns {Promise<Float32Array[]>} one Float32Array of peak values per channel
 */
async function extractPeaks(channelBuffers, peakCount) {
  return new Promise((resolve) => {
    const workerSrc = `
      self.onmessage = function(e) {
        const { channelBuffers, peakCount } = e.data;
        const outBuffers = channelBuffers.map(buf => {
          const data = new Float32Array(buf);
          const n = data.length;
          const blockSize = n / peakCount;
          const out = new Float32Array(peakCount);
          for (let i = 0; i < peakCount; i++) {
            let max = 0;
            const start = Math.floor(i * blockSize);
            const end = Math.min(Math.floor(start + blockSize), n);
            for (let j = start; j < end; j++) {
              const v = data[j] < 0 ? -data[j] : data[j];
              if (v > max) max = v;
            }
            out[i] = max;
          }
          return out.buffer;
        });
        self.postMessage({ peaks: outBuffers }, outBuffers);
      };
    `;

    const workerBlob = new Blob([workerSrc], { type: 'application/javascript' });
    const workerUrl = URL.createObjectURL(workerBlob);
    const worker = new Worker(workerUrl);

    worker.onmessage = (e) => {
      URL.revokeObjectURL(workerUrl);
      worker.terminate();
      resolve(e.data.peaks.map(buf => new Float32Array(buf)));
    };

    worker.postMessage({ channelBuffers, peakCount }, channelBuffers);
  });
}