Files
Genarrative/src/components/common/creativeAudioProcessing.ts
2026-06-06 22:56:53 +08:00

309 lines
8.8 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import {
type CreativeAudioAsset,
} from './creativeAudioFileAsset';
type BrowserAudioGlobal = typeof globalThis & {
webkitAudioContext?: typeof AudioContext;
};
export type CreativeAudioSource = 'uploaded' | 'recorded';
export type PendingCreativeAudioAsset = CreativeAudioAsset & {
fileName: string;
mimeType: string;
blob: Blob;
source: CreativeAudioSource;
previewUrl: string;
durationMs: number;
};
export type CreativeAudioProcessingOptions = {
maxDurationMs?: number;
silenceThreshold?: number;
targetLkfs?: number;
peakCeiling?: number;
};
export type AudibleFrameRange = {
startFrame: number;
frameCount: number;
};
const DEFAULT_MAX_DURATION_MS = 1000;
const DEFAULT_SILENCE_THRESHOLD = 0.01;
const DEFAULT_TARGET_LKFS = -15;
const DEFAULT_PEAK_CEILING = 0.98;
const WAV_HEADER_BYTE_LENGTH = 44;
const WAV_BITS_PER_SAMPLE = 16;
const WAV_BYTES_PER_SAMPLE = WAV_BITS_PER_SAMPLE / 8;
export async function prepareCreativeAudioFileForLocalUse(
file: File,
source: CreativeAudioSource,
options: CreativeAudioProcessingOptions = {},
): Promise<PendingCreativeAudioAsset> {
validateCreativeAudioFile(file);
const decodedBuffer = await decodeCreativeAudioFile(file);
const range = findAudibleFrameRange(
decodedBuffer,
options.silenceThreshold ?? DEFAULT_SILENCE_THRESHOLD,
);
if (!range) {
throw new Error('音频声音过小,请重新录制或上传。');
}
const durationMs = Math.round(
(range.frameCount / decodedBuffer.sampleRate) * 1000,
);
const maxDurationMs = options.maxDurationMs ?? DEFAULT_MAX_DURATION_MS;
if (durationMs > maxDurationMs) {
throw new Error(`音频最长 ${formatDurationSeconds(maxDurationMs)} 秒。`);
}
const normalized = normalizeAudioBufferSection(decodedBuffer, range, {
targetLkfs: options.targetLkfs ?? DEFAULT_TARGET_LKFS,
peakCeiling: options.peakCeiling ?? DEFAULT_PEAK_CEILING,
});
const blob = encodePcmChannelsToWavBlob(normalized, decodedBuffer.sampleRate);
const fileName = buildProcessedAudioFileName(file.name);
const previewUrl =
typeof URL !== 'undefined' && typeof URL.createObjectURL === 'function'
? URL.createObjectURL(blob)
: '';
return {
assetId: `local-${source}-${Date.now()}`,
audioSrc: previewUrl,
audioObjectKey: '',
assetObjectId: '',
source,
prompt: file.name,
durationMs,
fileName,
mimeType: blob.type,
blob,
previewUrl,
};
}
export function findAudibleFrameRange(
buffer: AudioBuffer,
silenceThreshold = DEFAULT_SILENCE_THRESHOLD,
): AudibleFrameRange | null {
const threshold = Math.max(0, silenceThreshold);
let startFrame: number | null = null;
let endFrame: number | null = null;
for (let frameIndex = 0; frameIndex < buffer.length; frameIndex += 1) {
if (isFrameAudible(buffer, frameIndex, threshold)) {
startFrame = frameIndex;
break;
}
}
if (startFrame === null) {
return null;
}
for (let frameIndex = buffer.length - 1; frameIndex >= startFrame; frameIndex -= 1) {
if (isFrameAudible(buffer, frameIndex, threshold)) {
endFrame = frameIndex;
break;
}
}
if (endFrame === null) {
return null;
}
return {
startFrame,
frameCount: endFrame - startFrame + 1,
};
}
export function normalizeAudioBufferSection(
buffer: AudioBuffer,
range: AudibleFrameRange,
options: Pick<CreativeAudioProcessingOptions, 'targetLkfs' | 'peakCeiling'> = {},
) {
const channelCount = Math.max(1, buffer.numberOfChannels);
const targetLkfs = options.targetLkfs ?? DEFAULT_TARGET_LKFS;
const peakCeiling = Math.max(0.01, options.peakCeiling ?? DEFAULT_PEAK_CEILING);
const channels = Array.from({ length: channelCount }, (_value, channelIndex) =>
copyChannelSection(buffer, channelIndex, range),
);
const stats = measurePcmStats(channels);
if (stats.rms <= 0 || stats.peak <= 0) {
throw new Error('音频声音过小,请重新录制或上传。');
}
// 浏览器端近似:用全通道 RMS 估算 LKFS再按 GY/T 377-2023 目标值拉到 -15 LKFS。
const targetLinear = Math.pow(10, targetLkfs / 20);
const loudnessGain = targetLinear / stats.rms;
const protectedGain = Math.min(loudnessGain, peakCeiling / stats.peak);
return channels.map((channel) =>
Float32Array.from(channel, (sample) => clampSample(sample * protectedGain)),
);
}
export function encodePcmChannelsToWavBlob(
channels: Float32Array[],
sampleRate: number,
) {
const channelCount = Math.max(1, channels.length);
const frameCount = channels[0]?.length ?? 0;
const dataByteLength = frameCount * channelCount * WAV_BYTES_PER_SAMPLE;
const output = new ArrayBuffer(WAV_HEADER_BYTE_LENGTH + dataByteLength);
const view = new DataView(output);
writeAscii(view, 0, 'RIFF');
view.setUint32(4, 36 + dataByteLength, true);
writeAscii(view, 8, 'WAVE');
writeAscii(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, channelCount, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * channelCount * WAV_BYTES_PER_SAMPLE, true);
view.setUint16(32, channelCount * WAV_BYTES_PER_SAMPLE, true);
view.setUint16(34, WAV_BITS_PER_SAMPLE, true);
writeAscii(view, 36, 'data');
view.setUint32(40, dataByteLength, true);
let outputOffset = WAV_HEADER_BYTE_LENGTH;
for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
const sample = channels[channelIndex]?.[frameIndex] ?? 0;
view.setInt16(outputOffset, toSignedPcm16(sample), true);
outputOffset += WAV_BYTES_PER_SAMPLE;
}
}
return new Blob([output], { type: 'audio/wav' });
}
function validateCreativeAudioFile(file: File) {
if (file.size <= 0) {
throw new Error('音频文件为空,请重新选择。');
}
if (!resolveFileMimeType(file).startsWith('audio/')) {
throw new Error('请选择音频文件。');
}
}
async function decodeCreativeAudioFile(file: File) {
const AudioContextConstructor = getAudioContextConstructor();
if (!AudioContextConstructor) {
throw new Error('当前浏览器不支持音频处理。');
}
const context = new AudioContextConstructor();
try {
const bytes = await file.arrayBuffer();
return await context.decodeAudioData(bytes.slice(0));
} catch {
throw new Error('音频解码失败,请重新选择。');
} finally {
void context.close();
}
}
function getAudioContextConstructor() {
const audioGlobal = globalThis as BrowserAudioGlobal;
return audioGlobal.AudioContext ?? audioGlobal.webkitAudioContext ?? null;
}
function resolveFileMimeType(file: File) {
if (file.type.trim()) {
return file.type.trim();
}
return '';
}
function isFrameAudible(
buffer: AudioBuffer,
frameIndex: number,
threshold: number,
) {
for (
let channelIndex = 0;
channelIndex < buffer.numberOfChannels;
channelIndex += 1
) {
const channelData = buffer.getChannelData(channelIndex);
if (Math.abs(channelData[frameIndex] ?? 0) > threshold) {
return true;
}
}
return false;
}
function copyChannelSection(
buffer: AudioBuffer,
channelIndex: number,
range: AudibleFrameRange,
) {
const source =
channelIndex < buffer.numberOfChannels
? buffer.getChannelData(channelIndex)
: new Float32Array(buffer.length);
const output = new Float32Array(range.frameCount);
for (let frameOffset = 0; frameOffset < range.frameCount; frameOffset += 1) {
output[frameOffset] = source[range.startFrame + frameOffset] ?? 0;
}
return output;
}
function measurePcmStats(channels: Float32Array[]) {
let sumSquares = 0;
let peak = 0;
let sampleCount = 0;
for (const channel of channels) {
for (const sample of channel) {
sumSquares += sample * sample;
peak = Math.max(peak, Math.abs(sample));
sampleCount += 1;
}
}
return {
rms: sampleCount > 0 ? Math.sqrt(sumSquares / sampleCount) : 0,
peak,
};
}
function clampSample(sample: number) {
return Math.max(-1, Math.min(1, sample));
}
function toSignedPcm16(sample: number) {
const clamped = clampSample(sample);
return clamped < 0
? Math.round(clamped * 0x8000)
: Math.round(clamped * 0x7fff);
}
function writeAscii(view: DataView, offset: number, value: string) {
for (let index = 0; index < value.length; index += 1) {
view.setUint8(offset + index, value.charCodeAt(index));
}
}
function buildProcessedAudioFileName(fileName: string) {
const normalizedName = fileName.trim();
if (!normalizedName) {
return 'creative-audio.wav';
}
return /\.[^.]+$/u.test(normalizedName)
? normalizedName.replace(/\.[^.]+$/u, '.wav')
: `${normalizedName}.wav`;
}
function formatDurationSeconds(durationMs: number) {
return Number.isInteger(durationMs / 1000)
? String(durationMs / 1000)
: (durationMs / 1000).toFixed(1);
}