309 lines
8.8 KiB
TypeScript
309 lines
8.8 KiB
TypeScript
import {
|
||
type CreativeAudioAsset,
|
||
} from './creativeAudioFileAsset';
|
||
|
||
type BrowserAudioGlobal = typeof globalThis & {
|
||
webkitAudioContext?: typeof AudioContext;
|
||
};
|
||
|
||
export type CreativeAudioSource = 'uploaded' | 'recorded';
|
||
|
||
export type PendingCreativeAudioAsset = CreativeAudioAsset & {
|
||
fileName: string;
|
||
mimeType: string;
|
||
blob: Blob;
|
||
source: CreativeAudioSource;
|
||
previewUrl: string;
|
||
durationMs: number;
|
||
};
|
||
|
||
export type CreativeAudioProcessingOptions = {
|
||
maxDurationMs?: number;
|
||
silenceThreshold?: number;
|
||
targetLkfs?: number;
|
||
peakCeiling?: number;
|
||
};
|
||
|
||
export type AudibleFrameRange = {
|
||
startFrame: number;
|
||
frameCount: number;
|
||
};
|
||
|
||
const DEFAULT_MAX_DURATION_MS = 1000;
|
||
const DEFAULT_SILENCE_THRESHOLD = 0.01;
|
||
const DEFAULT_TARGET_LKFS = -15;
|
||
const DEFAULT_PEAK_CEILING = 0.98;
|
||
const WAV_HEADER_BYTE_LENGTH = 44;
|
||
const WAV_BITS_PER_SAMPLE = 16;
|
||
const WAV_BYTES_PER_SAMPLE = WAV_BITS_PER_SAMPLE / 8;
|
||
|
||
export async function prepareCreativeAudioFileForLocalUse(
|
||
file: File,
|
||
source: CreativeAudioSource,
|
||
options: CreativeAudioProcessingOptions = {},
|
||
): Promise<PendingCreativeAudioAsset> {
|
||
validateCreativeAudioFile(file);
|
||
|
||
const decodedBuffer = await decodeCreativeAudioFile(file);
|
||
const range = findAudibleFrameRange(
|
||
decodedBuffer,
|
||
options.silenceThreshold ?? DEFAULT_SILENCE_THRESHOLD,
|
||
);
|
||
if (!range) {
|
||
throw new Error('音频声音过小,请重新录制或上传。');
|
||
}
|
||
|
||
const durationMs = Math.round(
|
||
(range.frameCount / decodedBuffer.sampleRate) * 1000,
|
||
);
|
||
const maxDurationMs = options.maxDurationMs ?? DEFAULT_MAX_DURATION_MS;
|
||
if (durationMs > maxDurationMs) {
|
||
throw new Error(`音频最长 ${formatDurationSeconds(maxDurationMs)} 秒。`);
|
||
}
|
||
|
||
const normalized = normalizeAudioBufferSection(decodedBuffer, range, {
|
||
targetLkfs: options.targetLkfs ?? DEFAULT_TARGET_LKFS,
|
||
peakCeiling: options.peakCeiling ?? DEFAULT_PEAK_CEILING,
|
||
});
|
||
const blob = encodePcmChannelsToWavBlob(normalized, decodedBuffer.sampleRate);
|
||
const fileName = buildProcessedAudioFileName(file.name);
|
||
const previewUrl =
|
||
typeof URL !== 'undefined' && typeof URL.createObjectURL === 'function'
|
||
? URL.createObjectURL(blob)
|
||
: '';
|
||
|
||
return {
|
||
assetId: `local-${source}-${Date.now()}`,
|
||
audioSrc: previewUrl,
|
||
audioObjectKey: '',
|
||
assetObjectId: '',
|
||
source,
|
||
prompt: file.name,
|
||
durationMs,
|
||
fileName,
|
||
mimeType: blob.type,
|
||
blob,
|
||
previewUrl,
|
||
};
|
||
}
|
||
|
||
export function findAudibleFrameRange(
|
||
buffer: AudioBuffer,
|
||
silenceThreshold = DEFAULT_SILENCE_THRESHOLD,
|
||
): AudibleFrameRange | null {
|
||
const threshold = Math.max(0, silenceThreshold);
|
||
let startFrame: number | null = null;
|
||
let endFrame: number | null = null;
|
||
|
||
for (let frameIndex = 0; frameIndex < buffer.length; frameIndex += 1) {
|
||
if (isFrameAudible(buffer, frameIndex, threshold)) {
|
||
startFrame = frameIndex;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (startFrame === null) {
|
||
return null;
|
||
}
|
||
|
||
for (let frameIndex = buffer.length - 1; frameIndex >= startFrame; frameIndex -= 1) {
|
||
if (isFrameAudible(buffer, frameIndex, threshold)) {
|
||
endFrame = frameIndex;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (endFrame === null) {
|
||
return null;
|
||
}
|
||
|
||
return {
|
||
startFrame,
|
||
frameCount: endFrame - startFrame + 1,
|
||
};
|
||
}
|
||
|
||
export function normalizeAudioBufferSection(
|
||
buffer: AudioBuffer,
|
||
range: AudibleFrameRange,
|
||
options: Pick<CreativeAudioProcessingOptions, 'targetLkfs' | 'peakCeiling'> = {},
|
||
) {
|
||
const channelCount = Math.max(1, buffer.numberOfChannels);
|
||
const targetLkfs = options.targetLkfs ?? DEFAULT_TARGET_LKFS;
|
||
const peakCeiling = Math.max(0.01, options.peakCeiling ?? DEFAULT_PEAK_CEILING);
|
||
const channels = Array.from({ length: channelCount }, (_value, channelIndex) =>
|
||
copyChannelSection(buffer, channelIndex, range),
|
||
);
|
||
const stats = measurePcmStats(channels);
|
||
if (stats.rms <= 0 || stats.peak <= 0) {
|
||
throw new Error('音频声音过小,请重新录制或上传。');
|
||
}
|
||
|
||
// 浏览器端近似:用全通道 RMS 估算 LKFS,再按 GY/T 377-2023 目标值拉到 -15 LKFS。
|
||
const targetLinear = Math.pow(10, targetLkfs / 20);
|
||
const loudnessGain = targetLinear / stats.rms;
|
||
const protectedGain = Math.min(loudnessGain, peakCeiling / stats.peak);
|
||
|
||
return channels.map((channel) =>
|
||
Float32Array.from(channel, (sample) => clampSample(sample * protectedGain)),
|
||
);
|
||
}
|
||
|
||
export function encodePcmChannelsToWavBlob(
|
||
channels: Float32Array[],
|
||
sampleRate: number,
|
||
) {
|
||
const channelCount = Math.max(1, channels.length);
|
||
const frameCount = channels[0]?.length ?? 0;
|
||
const dataByteLength = frameCount * channelCount * WAV_BYTES_PER_SAMPLE;
|
||
const output = new ArrayBuffer(WAV_HEADER_BYTE_LENGTH + dataByteLength);
|
||
const view = new DataView(output);
|
||
|
||
writeAscii(view, 0, 'RIFF');
|
||
view.setUint32(4, 36 + dataByteLength, true);
|
||
writeAscii(view, 8, 'WAVE');
|
||
writeAscii(view, 12, 'fmt ');
|
||
view.setUint32(16, 16, true);
|
||
view.setUint16(20, 1, true);
|
||
view.setUint16(22, channelCount, true);
|
||
view.setUint32(24, sampleRate, true);
|
||
view.setUint32(28, sampleRate * channelCount * WAV_BYTES_PER_SAMPLE, true);
|
||
view.setUint16(32, channelCount * WAV_BYTES_PER_SAMPLE, true);
|
||
view.setUint16(34, WAV_BITS_PER_SAMPLE, true);
|
||
writeAscii(view, 36, 'data');
|
||
view.setUint32(40, dataByteLength, true);
|
||
|
||
let outputOffset = WAV_HEADER_BYTE_LENGTH;
|
||
for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
|
||
for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
|
||
const sample = channels[channelIndex]?.[frameIndex] ?? 0;
|
||
view.setInt16(outputOffset, toSignedPcm16(sample), true);
|
||
outputOffset += WAV_BYTES_PER_SAMPLE;
|
||
}
|
||
}
|
||
|
||
return new Blob([output], { type: 'audio/wav' });
|
||
}
|
||
|
||
function validateCreativeAudioFile(file: File) {
|
||
if (file.size <= 0) {
|
||
throw new Error('音频文件为空,请重新选择。');
|
||
}
|
||
if (!resolveFileMimeType(file).startsWith('audio/')) {
|
||
throw new Error('请选择音频文件。');
|
||
}
|
||
}
|
||
|
||
async function decodeCreativeAudioFile(file: File) {
|
||
const AudioContextConstructor = getAudioContextConstructor();
|
||
if (!AudioContextConstructor) {
|
||
throw new Error('当前浏览器不支持音频处理。');
|
||
}
|
||
|
||
const context = new AudioContextConstructor();
|
||
try {
|
||
const bytes = await file.arrayBuffer();
|
||
return await context.decodeAudioData(bytes.slice(0));
|
||
} catch {
|
||
throw new Error('音频解码失败,请重新选择。');
|
||
} finally {
|
||
void context.close();
|
||
}
|
||
}
|
||
|
||
function getAudioContextConstructor() {
|
||
const audioGlobal = globalThis as BrowserAudioGlobal;
|
||
return audioGlobal.AudioContext ?? audioGlobal.webkitAudioContext ?? null;
|
||
}
|
||
|
||
function resolveFileMimeType(file: File) {
|
||
if (file.type.trim()) {
|
||
return file.type.trim();
|
||
}
|
||
return '';
|
||
}
|
||
|
||
function isFrameAudible(
|
||
buffer: AudioBuffer,
|
||
frameIndex: number,
|
||
threshold: number,
|
||
) {
|
||
for (
|
||
let channelIndex = 0;
|
||
channelIndex < buffer.numberOfChannels;
|
||
channelIndex += 1
|
||
) {
|
||
const channelData = buffer.getChannelData(channelIndex);
|
||
if (Math.abs(channelData[frameIndex] ?? 0) > threshold) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function copyChannelSection(
|
||
buffer: AudioBuffer,
|
||
channelIndex: number,
|
||
range: AudibleFrameRange,
|
||
) {
|
||
const source =
|
||
channelIndex < buffer.numberOfChannels
|
||
? buffer.getChannelData(channelIndex)
|
||
: new Float32Array(buffer.length);
|
||
const output = new Float32Array(range.frameCount);
|
||
for (let frameOffset = 0; frameOffset < range.frameCount; frameOffset += 1) {
|
||
output[frameOffset] = source[range.startFrame + frameOffset] ?? 0;
|
||
}
|
||
return output;
|
||
}
|
||
|
||
function measurePcmStats(channels: Float32Array[]) {
|
||
let sumSquares = 0;
|
||
let peak = 0;
|
||
let sampleCount = 0;
|
||
for (const channel of channels) {
|
||
for (const sample of channel) {
|
||
sumSquares += sample * sample;
|
||
peak = Math.max(peak, Math.abs(sample));
|
||
sampleCount += 1;
|
||
}
|
||
}
|
||
return {
|
||
rms: sampleCount > 0 ? Math.sqrt(sumSquares / sampleCount) : 0,
|
||
peak,
|
||
};
|
||
}
|
||
|
||
function clampSample(sample: number) {
|
||
return Math.max(-1, Math.min(1, sample));
|
||
}
|
||
|
||
function toSignedPcm16(sample: number) {
|
||
const clamped = clampSample(sample);
|
||
return clamped < 0
|
||
? Math.round(clamped * 0x8000)
|
||
: Math.round(clamped * 0x7fff);
|
||
}
|
||
|
||
function writeAscii(view: DataView, offset: number, value: string) {
|
||
for (let index = 0; index < value.length; index += 1) {
|
||
view.setUint8(offset + index, value.charCodeAt(index));
|
||
}
|
||
}
|
||
|
||
function buildProcessedAudioFileName(fileName: string) {
|
||
const normalizedName = fileName.trim();
|
||
if (!normalizedName) {
|
||
return 'creative-audio.wav';
|
||
}
|
||
return /\.[^.]+$/u.test(normalizedName)
|
||
? normalizedName.replace(/\.[^.]+$/u, '.wav')
|
||
: `${normalizedName}.wav`;
|
||
}
|
||
|
||
function formatDurationSeconds(durationMs: number) {
|
||
return Number.isInteger(durationMs / 1000)
|
||
? String(durationMs / 1000)
|
||
: (durationMs / 1000).toFixed(1);
|
||
}
|