/**
* @module audio
*/
import { PEAKS_PER_SECOND, LARGE_FILE_THRESHOLD_BYTES, SUPPORTED_AUDIO_EXTENSIONS, CHUNK_SIZE_BYTES } from "./constants.js";
/**
* Returns true if the File appears to be a supported audio file.
* Checks the MIME type first; if absent or video/mp4, falls back to the
* file extension to handle OS/browser inconsistencies.
* @param {File} file - the file to test
* @returns {boolean}
*/
export function isAudioFile(file) {
if (!file) return false;
if (file.type.startsWith('audio/')) return true;
// Fallback: trust the file extension when MIME type is empty or mis-reported
const dot = file.name.lastIndexOf('.');
if (dot === -1) return false;
return SUPPORTED_AUDIO_EXTENSIONS.has(file.name.slice(dot).toLowerCase());
}
/**
* Returns the duration of a media URL by loading it into a temporary Audio
* element and waiting for 'loadedmetadata'. No full decode is performed.
* @param {string} url - the audio URL to measure
* @returns {Promise<number>} duration in seconds (0 on error)
*/
function _getDurationFromUrl(url) {
return new Promise((resolve) => {
const audio = new Audio();
audio.addEventListener('loadedmetadata', () => resolve(audio.duration), { once: true });
audio.addEventListener('error', () => resolve(0), { once: true });
audio.src = url;
});
}
/**
* Extracts waveform peaks from a large audio file by decoding it in
* CHUNK_SIZE_BYTES slices. Only one decoded buffer is held in memory at a
* time, so peak memory stays well under 100 MB regardless of file length.
*
* Each slice is passed to decodeAudioData independently. MP3 frames are
* self-contained so boundary splits cause at most a few milliseconds of
* silence; visually this is invisible on a waveform. Slices that fail to
* decode (e.g. unsupported container fragments) contribute zero peaks.
*
* WAV files are handled specially: each PCM chunk is prepended with the
* original file's RIFF header (with sizes patched) before being passed to
* decodeAudioData. This avoids the headerless-chunk problem and delegates
* all format-specific decoding (PCM, float, extensible, etc.) to the
* browser's native decoder rather than reimplementing it in JavaScript.
*
* @param {File} file - audio file to decode in chunks
* @param {function(number):void} [onProgress] - called after each chunk with fraction 0–1
* @param {number} [knownDuration] - duration in seconds if already known, skips a metadata fetch
* @returns {Promise<{url, sampleRate, peaks, duration, filename}>}
*/
async function _extractPeaksChunked(file, onProgress, knownDuration) {
// ── WAV special case ─────────────────────────────────────────────────────
// WAV files have a single RIFF header at the start; plain byte slices from
// chunk 2 onwards have no header and decodeAudioData rejects them, filling
// those regions with zeros. Fix: locate the PCM data region once, then
// prepend the original header bytes to every chunk before decoding.
const isWav = file.type === 'audio/wav' || file.type === 'audio/x-wav'
|| file.name.toLowerCase().endsWith('.wav');
let wavHeaderTemplate = null; // ArrayBuffer containing bytes [0 .. dataOffset)
let wavDataOffset = 0; // byte offset of PCM data within the file
let wavDataSize = 0; // byte length of PCM data region
let wavBytesPerFrame = 0; // bytes per interleaved audio frame (channels × bytesPerSample)
if (isWav) {
// Locate the fmt and data chunks by scanning the file header.
// Professional recordings often embed large metadata blocks (iXML, bext,
// axml, large LIST chunks, etc.) that can push the data chunk well past
// 64 KB. We scan in doubling windows — starting at 64 KB and growing up
// to 100 MB — so the common case stays fast while exotic files still work.
const MAX_HEADER_SCAN = Math.min(100 * 1024 * 1024, file.size);
let scanSize = Math.min(65536, file.size);
let scanBuf = await file.slice(0, scanSize).arrayBuffer();
let scanView = new DataView(scanBuf);
const readTag = (off) => String.fromCharCode(
scanView.getUint8(off), scanView.getUint8(off+1),
scanView.getUint8(off+2), scanView.getUint8(off+3));
// RF64 (BW64) uses 'RF64' instead of 'RIFF' for files > 4 GB; the rest
// of the chunk layout is identical. The 'data' chunk size field is
// 0xFFFFFFFF in RF64 — chunkRegionSize = Math.min(wavDataSize, file.size−wavDataOffset)
// handles that sentinel correctly without needing to parse the ds64 chunk.
const magic = scanBuf.byteLength >= 12 ? readTag(0) : '';
const form = scanBuf.byteLength >= 12 ? readTag(8) : '';
if ((magic === 'RIFF' || magic === 'RF64') && form === 'WAVE') {
let pos = 12;
scanLoop: while (pos < file.size) {
// Expand the buffer as needed to read the next chunk header (8 bytes)
while (pos + 8 > scanSize && scanSize < MAX_HEADER_SCAN) {
scanSize = Math.min(scanSize * 2, MAX_HEADER_SCAN, file.size);
scanBuf = await file.slice(0, scanSize).arrayBuffer();
scanView = new DataView(scanBuf);
}
if (pos + 8 > scanBuf.byteLength) break;
const id = readTag(pos);
const chunkSz = scanView.getUint32(pos + 4, true);
if (id === 'fmt ') {
// Expand if needed to read the format fields (need at least pos+24)
while (pos + 24 > scanSize && scanSize < MAX_HEADER_SCAN) {
scanSize = Math.min(scanSize * 2, MAX_HEADER_SCAN, file.size);
scanBuf = await file.slice(0, scanSize).arrayBuffer();
scanView = new DataView(scanBuf);
}
if (pos + 24 <= scanBuf.byteLength) {
const nCh = scanView.getUint16(pos + 10, true);
const bps = scanView.getUint16(pos + 22, true);
wavBytesPerFrame = nCh * (bps >> 3);
}
} else if (id === 'data') {
wavDataOffset = pos + 8;
wavDataSize = chunkSz;
break scanLoop;
}
if (chunkSz === 0) break; // guard: stop on malformed zero-size chunk
pos += 8 + chunkSz + (chunkSz & 1);
}
}
if (wavDataOffset > 0) {
// Read the exact header bytes (everything before the PCM data) as a
// reusable template that will be prepended to every decoded chunk.
wavHeaderTemplate = await file.slice(0, wavDataOffset).arrayBuffer();
console.log(`[audio] WAV header: dataOffset=${wavDataOffset}, bytesPerFrame=${wavBytesPerFrame}, headerScannedTo=${scanSize}`);
} else {
console.warn(`[audio] WAV data chunk not found in header scan (magic='${magic}', form='${form}') — falling back to whole-file chunking (only chunk 0 will decode)`);
}
// If we couldn't parse the header, fall through — decodeAudioData will
// at least succeed on chunk 0 and fail silently on the rest.
}
// For WAV, align the chunk size to a whole number of frames so that every
// chunk starts on a valid frame boundary. Without this, formats whose frame
// size does not divide CHUNK_SIZE_BYTES evenly (e.g. 24-bit stereo = 6 bytes/
// frame, 4 194 304 mod 6 = 4) produce misaligned PCM slices from chunk 2
// onwards, causing decodeAudioData to misinterpret sample boundaries and
// return near-maximum-amplitude garbage values.
const effectiveChunkSize = (wavBytesPerFrame > 1)
? Math.floor(CHUNK_SIZE_BYTES / wavBytesPerFrame) * wavBytesPerFrame
: CHUNK_SIZE_BYTES;
const url = URL.createObjectURL(file);
const duration = knownDuration ?? await _getDurationFromUrl(url);
// For WAV, iterate only over the PCM data region (header is prepended separately).
// For all other formats, iterate over the whole file as before.
const chunkRegionStart = wavHeaderTemplate ? wavDataOffset : 0;
const chunkRegionSize = wavHeaderTemplate
? Math.min(wavDataSize, file.size - wavDataOffset)
: file.size;
const numChunks = Math.ceil(chunkRegionSize / effectiveChunkSize);
const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);
const allPeaks = new Float32Array(peakCount);
let peaksWritten = 0;
let detectedSampleRate = 44100;
const ac = new (window.AudioContext || window.webkitAudioContext)();
for (let i = 0; i < numChunks; i++) {
const byteStart = chunkRegionStart + i * effectiveChunkSize;
const byteEnd = Math.min(byteStart + effectiveChunkSize, chunkRegionStart + chunkRegionSize);
// Allocate peaks proportional to this chunk's share of the audio data
const chunkPeakCount = Math.max(1, Math.round(peakCount * (byteEnd - byteStart) / chunkRegionSize));
try {
let arrayBuf = await file.slice(byteStart, byteEnd).arrayBuffer();
if (wavHeaderTemplate) {
// Build a valid WAV from the original header + this PCM chunk,
// then patch the RIFF and data-chunk size fields so the browser
// accepts it as a standalone audio file.
const combined = new ArrayBuffer(wavHeaderTemplate.byteLength + arrayBuf.byteLength);
const dst = new Uint8Array(combined);
dst.set(new Uint8Array(wavHeaderTemplate));
dst.set(new Uint8Array(arrayBuf), wavHeaderTemplate.byteLength);
const pv = new DataView(combined);
pv.setUint32(4, combined.byteLength - 8, true); // RIFF size
pv.setUint32(wavDataOffset - 4, arrayBuf.byteLength, true); // data chunk size
arrayBuf = combined;
}
const audioBuffer = await ac.decodeAudioData(arrayBuf);
if (i === 0) detectedSampleRate = audioBuffer.sampleRate;
// Pre-fetch all channel data arrays so the hot loop only touches typed arrays
const numChannels = audioBuffer.numberOfChannels;
const channels = Array.from({ length: numChannels }, (_, c) => audioBuffer.getChannelData(c));
const n = audioBuffer.length;
const blockSize = n / chunkPeakCount;
for (let p = 0; p < chunkPeakCount && peaksWritten < peakCount; p++) {
let max = 0;
const s = Math.floor(p * blockSize);
const e = Math.min(Math.floor(s + blockSize), n);
for (let j = s; j < e; j++) {
// Average across channels, then take absolute value
let sample = channels[0][j];
for (let c = 1; c < numChannels; c++) sample += channels[c][j];
const v = Math.abs(sample / numChannels);
if (v > max) max = v;
}
allPeaks[peaksWritten++] = max;
}
} catch {
// Slice failed to decode — advance the write cursor with zeros
peaksWritten = Math.min(peakCount, peaksWritten + chunkPeakCount);
}
if (onProgress) onProgress((i + 1) / numChunks);
}
await ac.close();
const finalPeaks = allPeaks.slice(0, peaksWritten);
const max = finalPeaks.reduce((m, v) => Math.max(m, v), 0);
const mean = finalPeaks.reduce((s, v) => s + v, 0) / (finalPeaks.length || 1);
console.log(
`[audio] chunked peaks: ${peaksWritten} values, ` +
`max=${max.toFixed(4)}, mean=${mean.toFixed(4)}, ` +
`first10=[${Array.from(finalPeaks.slice(0, 10)).map(v => v.toFixed(3)).join(',')}]`
);
return {
url,
file,
sampleRate: detectedSampleRate,
peaks: [finalPeaks],
duration,
filename: file.name,
};
}
/**
* Extracts waveform peaks from a remote audio URL by fetching it in
* CHUNK_SIZE_BYTES slices via HTTP Range requests and decoding each chunk
* independently. Peak memory stays under ~100 MB regardless of file length.
*
* Returns null if the server does not advertise a Content-Length (chunking
* is impossible without knowing the total size) or rejects Range requests,
* so callers should fall back to the standard WaveSurfer decode path.
*
* @param {string} url - URL to an audio resource that supports Range requests
* @param {object} [opts] - optional configuration object
* @param {function(number):void} [opts.onProgress] - called after each chunk with fraction 0–1
* @returns {Promise<{peaks: Float32Array[], sampleRate: number, duration: number}|null>}
*/
export async function extractPeaksFromUrl(url, { onProgress } = {}) {
// Resolve total byte count and duration concurrently without downloading audio
const [headRes, duration] = await Promise.all([
fetch(url, { method: 'HEAD' }),
_getDurationFromUrl(url),
]);
const totalBytes = parseInt(headRes.headers.get('Content-Length') || '0');
if (!totalBytes || !duration) return null; // can't chunk without size + duration
const numChunks = Math.ceil(totalBytes / CHUNK_SIZE_BYTES);
const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);
const allPeaks = new Float32Array(peakCount);
let peaksWritten = 0;
let detectedSampleRate = 44100;
const ac = new (window.AudioContext || window.webkitAudioContext)();
for (let i = 0; i < numChunks; i++) {
const byteStart = i * CHUNK_SIZE_BYTES;
const byteEnd = Math.min(byteStart + CHUNK_SIZE_BYTES - 1, totalBytes - 1);
const chunkPeakCount = Math.max(1, Math.round(peakCount * (byteEnd - byteStart + 1) / totalBytes));
try {
const res = await fetch(url, { headers: { Range: `bytes=${byteStart}-${byteEnd}` } });
// 206 = server honoured the Range; 200 means it returned the full file
if (i === 0 && res.status !== 206) {
await ac.close();
return null; // server doesn't support Range requests — caller falls back
}
const arrayBuf = await res.arrayBuffer();
const audioBuffer = await ac.decodeAudioData(arrayBuf);
if (i === 0) detectedSampleRate = audioBuffer.sampleRate;
const numChannels = audioBuffer.numberOfChannels;
const channels = Array.from({ length: numChannels }, (_, c) => audioBuffer.getChannelData(c));
const n = audioBuffer.length;
const blockSize = n / chunkPeakCount;
for (let p = 0; p < chunkPeakCount && peaksWritten < peakCount; p++) {
let max = 0;
const s = Math.floor(p * blockSize);
const e = Math.min(Math.floor(s + blockSize), n);
for (let j = s; j < e; j++) {
let sample = channels[0][j];
for (let c = 1; c < numChannels; c++) sample += channels[c][j];
const v = Math.abs(sample / numChannels);
if (v > max) max = v;
}
allPeaks[peaksWritten++] = max;
}
} catch {
peaksWritten = Math.min(peakCount, peaksWritten + chunkPeakCount);
}
if (onProgress) onProgress((i + 1) / numChunks);
}
await ac.close();
return {
peaks: [allPeaks.slice(0, peaksWritten)],
sampleRate: detectedSampleRate,
duration,
};
}
// Files whose estimated decoded PCM would exceed this use chunked decoding.
// Estimated as: duration * 48000 Hz * 2 channels * 4 bytes/sample (worst case).
// 600 MB ≈ 36 minutes of stereo 44.1 kHz audio — keeps peak memory under ~1.5 GB
// on the small path (arrayBuffer + decoded PCM + mono copy + WAV blob).
const DECODED_SIZE_THRESHOLD_BYTES = 600 * 1024 * 1024;
/**
* Reads an audio file, decodes it via the Web Audio API, mixes all channels
* down to mono, extracts waveform peaks via a Web Worker, and creates a
* streamable object URL (mono WAV) for playback. Has no knowledge of AppState
* and can be used independently of the wider application.
*
* For files larger than LARGE_FILE_THRESHOLD_BYTES the audio is decoded in
* small chunks so that peak memory never exceeds ~100 MB regardless of file
* length (see _extractPeaksChunked).
*
* The caller is responsible for revoking the returned URL when it is no longer
* needed: `URL.revokeObjectURL(url)`.
*
* @param {File} file - audio file to load; any format supported by the browser
* @param {object} [opts] - optional configuration object
* @param {function(number):void} [opts.onProgress] - progress callback, fraction 0–1 (large files only)
* @returns {Promise<{url: string, sampleRate: number, peaks: Float32Array[], duration: number, filename: string}>}
* url - object URL suitable for passing to WaveSurfer.load()
* sampleRate - the sample rate of the decoded audio in Hz
* peaks - one Float32Array of absolute peak values (mono mix)
* duration - total duration of the audio in seconds
* filename - the original file name
*/
export async function loadAudioFile(file, { onProgress } = {}) {
const mb = (file.size / (1024 * 1024)).toFixed(1);
if (file.size > LARGE_FILE_THRESHOLD_BYTES) {
return _extractPeaksChunked(file, onProgress);
}
// For small files, also check estimated decoded PCM size. Highly-compressed
// audio (e.g. low-bitrate MP3) can be tiny on disk but decode to gigabytes.
const tempUrl = URL.createObjectURL(file);
const duration = await _getDurationFromUrl(tempUrl);
URL.revokeObjectURL(tempUrl);
const estimatedDecodedBytes = duration * 48000 * 2 * 4; // conservative worst case
if (estimatedDecodedBytes > DECODED_SIZE_THRESHOLD_BYTES) {
return _extractPeaksChunked(file, onProgress, duration);
}
const arrayBuf = await file.arrayBuffer();
const ac = new (window.AudioContext || window.webkitAudioContext)();
const audioBuffer = await ac.decodeAudioData(arrayBuf);
const sampleRate = audioBuffer.sampleRate;
const totalSamples = audioBuffer.length;
const peakCount = Math.ceil(duration * PEAKS_PER_SECOND);
// Mix all channels down to mono
const numChannels = audioBuffer.numberOfChannels;
const monoData = new Float32Array(totalSamples);
for (let c = 0; c < numChannels; c++) {
const ch = audioBuffer.getChannelData(c);
for (let i = 0; i < totalSamples; i++) monoData[i] += ch[i];
}
if (numChannels > 1) {
for (let i = 0; i < totalSamples; i++) monoData[i] /= numChannels;
}
ac.close();
const monoBuffer = [monoData.buffer.slice(0)];
const [peaks, url] = await Promise.all([
extractPeaks(monoBuffer, peakCount),
Promise.resolve(URL.createObjectURL(encodeMonoWav(monoData, sampleRate))),
]);
const p = peaks[0];
const max = p.reduce((m, v) => Math.max(m, v), 0);
const mean = p.reduce((s, v) => s + v, 0) / (p.length || 1);
console.log(
`[audio] small-path peaks: ${p.length} values, ` +
`max=${max.toFixed(4)}, mean=${mean.toFixed(4)}, ` +
`first10=[${Array.from(p.slice(0, 10)).map(v => v.toFixed(3)).join(',')}]`
);
return { url, sampleRate, peaks, duration, filename: file.name };
}
/**
* Decodes an audio ArrayBuffer and returns an AudioBuffer, a 120-bar peaks
* array, and a blobUrl suitable for re-upload. The ArrayBuffer is consumed by
* decodeAudioData; the returned blobUrl is re-encoded from the decoded data.
* @param {ArrayBuffer} arrayBuffer - raw audio data to decode
* @returns {Promise<{audioBuffer: AudioBuffer, blobUrl: string, peaks: number[]}>}
*/
export async function decodeSampleArrayBuffer(arrayBuffer) {
const actx = new (window.AudioContext || window.webkitAudioContext)();
const audioBuffer = await actx.decodeAudioData(arrayBuffer);
const data = audioBuffer.getChannelData(0);
const numBars = 120;
const blockSize = Math.floor(data.length / numBars);
const peaks = [];
for (let i = 0; i < numBars; i++) {
let max = 0;
for (let j = 0; j < blockSize; j++) max = Math.max(max, Math.abs(data[i * blockSize + j]));
peaks.push(max);
}
actx.close();
const blobUrl = URL.createObjectURL(encodeMonoWav(data, audioBuffer.sampleRate));
return { audioBuffer, blobUrl, peaks };
}
/**
* Encodes a Float32Array of mono PCM samples as a 16-bit WAV Blob.
* @param {Float32Array} samples - mono PCM data in the range [-1, 1]
* @param {number} sampleRate - sample rate in Hz for the WAV header
* @returns {Blob} WAV blob with audio/wav MIME type
*/
export function encodeMonoWav(samples, sampleRate) {
const bytesPerSample = 2; // 16-bit PCM
const dataSize = samples.length * bytesPerSample;
const buf = new ArrayBuffer(44 + dataSize);
const v = new DataView(buf);
const str = (off, s) => { for (let i = 0; i < s.length; i++) v.setUint8(off + i, s.charCodeAt(i)); };
str(0, 'RIFF'); v.setUint32( 4, 36 + dataSize, true);
str(8, 'WAVE');
str(12, 'fmt '); v.setUint32(16, 16, true);
v.setUint16(20, 1, true); // PCM
v.setUint16(22, 1, true); // mono
v.setUint32(24, sampleRate, true);
v.setUint32(28, sampleRate * bytesPerSample, true); // byte rate
v.setUint16(32, bytesPerSample, true); // block align
v.setUint16(34, 16, true); // bits per sample
str(36, 'data'); v.setUint32(40, dataSize, true);
let off = 44;
for (let i = 0; i < samples.length; i++) {
const s = Math.max(-1, Math.min(1, samples[i]));
v.setInt16(off, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
off += 2;
}
return new Blob([buf], { type: 'audio/wav' });
}
/**
* Extracts per-channel waveform peaks from raw PCM channel buffers using an
* inline Web Worker, keeping the UI thread free during the computation. Each
* channel is downsampled to peakCount blocks, with each block's value being
* the maximum absolute amplitude of the samples within it.
* @param {ArrayBuffer[]} channelBuffers - raw PCM data, one ArrayBuffer per channel
* @param {number} peakCount - number of peak samples to extract per channel
* @returns {Promise<Float32Array[]>} one Float32Array of peak values per channel
*/
async function extractPeaks(channelBuffers, peakCount) {
return new Promise((resolve) => {
const workerSrc = `
self.onmessage = function(e) {
const { channelBuffers, peakCount } = e.data;
const outBuffers = channelBuffers.map(buf => {
const data = new Float32Array(buf);
const n = data.length;
const blockSize = n / peakCount;
const out = new Float32Array(peakCount);
for (let i = 0; i < peakCount; i++) {
let max = 0;
const start = Math.floor(i * blockSize);
const end = Math.min(Math.floor(start + blockSize), n);
for (let j = start; j < end; j++) {
const v = data[j] < 0 ? -data[j] : data[j];
if (v > max) max = v;
}
out[i] = max;
}
return out.buffer;
});
self.postMessage({ peaks: outBuffers }, outBuffers);
};
`;
const workerBlob = new Blob([workerSrc], { type: 'application/javascript' });
const workerUrl = URL.createObjectURL(workerBlob);
const worker = new Worker(workerUrl);
worker.onmessage = (e) => {
URL.revokeObjectURL(workerUrl);
worker.terminate();
resolve(e.data.peaks.map(buf => new Float32Array(buf)));
};
worker.postMessage({ channelBuffers, peakCount }, channelBuffers);
});
}