utilities_transcription_pricing.js
/**
* Pricing data and estimation functions for Modal transcription jobs.
* Cost model: (whisper compute time + pyannote compute time) × GPU $/sec.
* RTFs are empirical estimates for faster-whisper on A10G (roughly 4× faster
* than the original Whisper PyTorch implementation benchmarked on A100).
* pyannote RTF from pyannote/speaker-diarization-3.x model card.
*/
export const WHISPER_MODELS = [
{ key: 'tiny', label: 'Tiny — fastest, lowest accuracy', rtf: 1/120 },
{ key: 'base', label: 'Base — fast, lower accuracy', rtf: 1/60 },
{ key: 'small', label: 'Small — balanced', rtf: 1/24 },
{ key: 'turbo', label: 'Turbo — fast + accurate', rtf: 1/32 },
{ key: 'medium', label: 'Medium — recommended', rtf: 1/8 },
{ key: 'large', label: 'Large — most accurate, slowest', rtf: 1/4 },
];
export const PYANNOTE_MODELS = [
{ key: 'pyannote/speaker-diarization-3.1', label: 'Speaker Diarization 3.1', rtf: 0.025 },
{ key: 'pyannote/speaker-diarization-community-1', label: 'Speaker Diarization Community 1', rtf: 0.025 },
];
/** A10G GPU cost per second on Modal. */
const GPU_PER_SEC = 0.000306;
/**
* Estimates the dollar cost of a transcription job.
* @param {number} audioDurationSecs - audio duration in seconds
* @param {string} whisperModelKey - key of the Whisper model (e.g. 'medium')
* @param {string} pyannoteModelKey - key of the pyannote diarization model
* @returns {number} estimated cost in USD
*/
export function estimateCost(audioDurationSecs, whisperModelKey, pyannoteModelKey) {
const w = WHISPER_MODELS.find(m => m.key === whisperModelKey) ?? WHISPER_MODELS.find(m => m.key === 'medium');
const p = PYANNOTE_MODELS.find(m => m.key === pyannoteModelKey) ?? PYANNOTE_MODELS[0];
return (audioDurationSecs * w.rtf + audioDurationSecs * p.rtf) * GPU_PER_SEC;
}
/**
* Estimates the processing time of a transcription job.
* @param {number} audioDurationSecs - audio duration in seconds
* @param {string} whisperModelKey - key of the Whisper model (e.g. 'medium')
* @param {string} pyannoteModelKey - key of the pyannote diarization model
* @returns {number} estimated processing time in seconds
*/
export function estimateTime(audioDurationSecs, whisperModelKey, pyannoteModelKey) {
const w = WHISPER_MODELS.find(m => m.key === whisperModelKey) ?? WHISPER_MODELS.find(m => m.key === 'medium');
const p = PYANNOTE_MODELS.find(m => m.key === pyannoteModelKey) ?? PYANNOTE_MODELS[0];
return audioDurationSecs * w.rtf + audioDurationSecs * p.rtf;
}
/**
* Formats a cost value in USD for display.
* @param {number} n - cost in USD
* @returns {string} formatted cost string
*/
export function fmtCost(n) {
if (n < 0.01) return '<$0.01';
if (n < 10) return '$' + n.toFixed(2);
return '$' + Math.round(n).toLocaleString();
}
/**
* Calculates a recommended monthly subscription price from tier limits.
*
* Formula: ((storage_gb * 0.020) + (costPerHr(mostExpensiveModel) * transcription_hrs_month)) * 1.30
*
* @param {object} params - Tier feature limits used to calculate the price.
* @param {number|null} params.storage_gb - storage quota in GB (null = unlimited)
* @param {number|null} params.transcription_hrs_month - monthly transcription quota in hours (null = unlimited)
* @param {string[]} params.whisper_models - allowed Whisper model keys for this tier
* @returns {number|null} recommended price in USD/month, or null if any limit is unlimited
*/
export function calcTierPrice({ storage_gb, transcription_hrs_month, whisper_models }) {
if (storage_gb == null || transcription_hrs_month == null) return null;
const allowed = WHISPER_MODELS.filter(m => whisper_models.includes(m.key));
if (!allowed.length) return null;
const mostExpensive = allowed.reduce((a, b) => a.rtf > b.rtf ? a : b);
const costPerHr = mostExpensive.rtf * 3600 * GPU_PER_SEC;
return ((storage_gb * 0.020) + (costPerHr * transcription_hrs_month)) * 1.30;
}
/**
* Formats a duration in seconds to a human-readable string.
* @param {number} s - duration in seconds
* @returns {string} formatted duration string (e.g. '2.5m', '1h')
*/
export function fmtDur(s) {
if (s < 60) return Math.round(s) + 's';
if (s < 3600) return (s / 60).toFixed(1).replace(/\.0$/, '') + 'm';
return (s / 3600).toFixed(1).replace(/\.0$/, '') + 'h';
}