Genarrative/src/components/common/creativeAudioProcessing.ts

import {
  type CreativeAudioAsset,
} from './creativeAudioFileAsset';

type BrowserAudioGlobal = typeof globalThis & {
  webkitAudioContext?: typeof AudioContext;
};

export type CreativeAudioSource = 'uploaded' | 'recorded';

export type PendingCreativeAudioAsset = CreativeAudioAsset & {
  fileName: string;
  mimeType: string;
  blob: Blob;
  source: CreativeAudioSource;
  previewUrl: string;
  durationMs: number;
};

export type CreativeAudioProcessingOptions = {
  maxDurationMs?: number;
  silenceThreshold?: number;
  targetLkfs?: number;
  peakCeiling?: number;
};

export type AudibleFrameRange = {
  startFrame: number;
  frameCount: number;
};

const DEFAULT_MAX_DURATION_MS = 1000;
const DEFAULT_SILENCE_THRESHOLD = 0.01;
const DEFAULT_TARGET_LKFS = -15;
const DEFAULT_PEAK_CEILING = 0.98;
const WAV_HEADER_BYTE_LENGTH = 44;
const WAV_BITS_PER_SAMPLE = 16;
const WAV_BYTES_PER_SAMPLE = WAV_BITS_PER_SAMPLE / 8;

export async function prepareCreativeAudioFileForLocalUse(
  file: File,
  source: CreativeAudioSource,
  options: CreativeAudioProcessingOptions = {},
): Promise<PendingCreativeAudioAsset> {
  validateCreativeAudioFile(file);

  const decodedBuffer = await decodeCreativeAudioFile(file);
  const range = findAudibleFrameRange(
    decodedBuffer,
    options.silenceThreshold ?? DEFAULT_SILENCE_THRESHOLD,
  );
  if (!range) {
    throw new Error('音频声音过小，请重新录制或上传。');
  }

  const durationMs = Math.round(
    (range.frameCount / decodedBuffer.sampleRate) * 1000,
  );
  const maxDurationMs = options.maxDurationMs ?? DEFAULT_MAX_DURATION_MS;
  if (durationMs > maxDurationMs) {
    throw new Error(`音频最长 ${formatDurationSeconds(maxDurationMs)} 秒。`);
  }

  const normalized = normalizeAudioBufferSection(decodedBuffer, range, {
    targetLkfs: options.targetLkfs ?? DEFAULT_TARGET_LKFS,
    peakCeiling: options.peakCeiling ?? DEFAULT_PEAK_CEILING,
  });
  const blob = encodePcmChannelsToWavBlob(normalized, decodedBuffer.sampleRate);
  const fileName = buildProcessedAudioFileName(file.name);
  const previewUrl =
    typeof URL !== 'undefined' && typeof URL.createObjectURL === 'function'
      ? URL.createObjectURL(blob)
      : '';

  return {
    assetId: `local-${source}-${Date.now()}`,
    audioSrc: previewUrl,
    audioObjectKey: '',
    assetObjectId: '',
    source,
    prompt: file.name,
    durationMs,
    fileName,
    mimeType: blob.type,
    blob,
    previewUrl,
  };
}

export function findAudibleFrameRange(
  buffer: AudioBuffer,
  silenceThreshold = DEFAULT_SILENCE_THRESHOLD,
): AudibleFrameRange | null {
  const threshold = Math.max(0, silenceThreshold);
  let startFrame: number | null = null;
  let endFrame: number | null = null;

  for (let frameIndex = 0; frameIndex < buffer.length; frameIndex += 1) {
    if (isFrameAudible(buffer, frameIndex, threshold)) {
      startFrame = frameIndex;
      break;
    }
  }

  if (startFrame === null) {
    return null;
  }

  for (let frameIndex = buffer.length - 1; frameIndex >= startFrame; frameIndex -= 1) {
    if (isFrameAudible(buffer, frameIndex, threshold)) {
      endFrame = frameIndex;
      break;
    }
  }

  if (endFrame === null) {
    return null;
  }

  return {
    startFrame,
    frameCount: endFrame - startFrame + 1,
  };
}

export function normalizeAudioBufferSection(
  buffer: AudioBuffer,
  range: AudibleFrameRange,
  options: Pick<CreativeAudioProcessingOptions, 'targetLkfs' | 'peakCeiling'> = {},
) {
  const channelCount = Math.max(1, buffer.numberOfChannels);
  const targetLkfs = options.targetLkfs ?? DEFAULT_TARGET_LKFS;
  const peakCeiling = Math.max(0.01, options.peakCeiling ?? DEFAULT_PEAK_CEILING);
  const channels = Array.from({ length: channelCount }, (_value, channelIndex) =>
    copyChannelSection(buffer, channelIndex, range),
  );
  const stats = measurePcmStats(channels);
  if (stats.rms <= 0 || stats.peak <= 0) {
    throw new Error('音频声音过小，请重新录制或上传。');
  }

  // 浏览器端近似：用全通道 RMS 估算 LKFS，再按 GY/T 377-2023 目标值拉到 -15 LKFS。
  const targetLinear = Math.pow(10, targetLkfs / 20);
  const loudnessGain = targetLinear / stats.rms;
  const protectedGain = Math.min(loudnessGain, peakCeiling / stats.peak);

  return channels.map((channel) =>
    Float32Array.from(channel, (sample) => clampSample(sample * protectedGain)),
  );
}

export function encodePcmChannelsToWavBlob(
  channels: Float32Array[],
  sampleRate: number,
) {
  const channelCount = Math.max(1, channels.length);
  const frameCount = channels[0]?.length ?? 0;
  const dataByteLength = frameCount * channelCount * WAV_BYTES_PER_SAMPLE;
  const output = new ArrayBuffer(WAV_HEADER_BYTE_LENGTH + dataByteLength);
  const view = new DataView(output);

  writeAscii(view, 0, 'RIFF');
  view.setUint32(4, 36 + dataByteLength, true);
  writeAscii(view, 8, 'WAVE');
  writeAscii(view, 12, 'fmt ');
  view.setUint32(16, 16, true);
  view.setUint16(20, 1, true);
  view.setUint16(22, channelCount, true);
  view.setUint32(24, sampleRate, true);
  view.setUint32(28, sampleRate * channelCount * WAV_BYTES_PER_SAMPLE, true);
  view.setUint16(32, channelCount * WAV_BYTES_PER_SAMPLE, true);
  view.setUint16(34, WAV_BITS_PER_SAMPLE, true);
  writeAscii(view, 36, 'data');
  view.setUint32(40, dataByteLength, true);

  let outputOffset = WAV_HEADER_BYTE_LENGTH;
  for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
    for (let channelIndex = 0; channelIndex < channelCount; channelIndex += 1) {
      const sample = channels[channelIndex]?.[frameIndex] ?? 0;
      view.setInt16(outputOffset, toSignedPcm16(sample), true);
      outputOffset += WAV_BYTES_PER_SAMPLE;
    }
  }

  return new Blob([output], { type: 'audio/wav' });
}

function validateCreativeAudioFile(file: File) {
  if (file.size <= 0) {
    throw new Error('音频文件为空，请重新选择。');
  }
  if (!resolveFileMimeType(file).startsWith('audio/')) {
    throw new Error('请选择音频文件。');
  }
}

async function decodeCreativeAudioFile(file: File) {
  const AudioContextConstructor = getAudioContextConstructor();
  if (!AudioContextConstructor) {
    throw new Error('当前浏览器不支持音频处理。');
  }

  const context = new AudioContextConstructor();
  try {
    const bytes = await file.arrayBuffer();
    return await context.decodeAudioData(bytes.slice(0));
  } catch {
    throw new Error('音频解码失败，请重新选择。');
  } finally {
    void context.close();
  }
}

function getAudioContextConstructor() {
  const audioGlobal = globalThis as BrowserAudioGlobal;
  return audioGlobal.AudioContext ?? audioGlobal.webkitAudioContext ?? null;
}

function resolveFileMimeType(file: File) {
  if (file.type.trim()) {
    return file.type.trim();
  }
  return '';
}

function isFrameAudible(
  buffer: AudioBuffer,
  frameIndex: number,
  threshold: number,
) {
  for (
    let channelIndex = 0;
    channelIndex < buffer.numberOfChannels;
    channelIndex += 1
  ) {
    const channelData = buffer.getChannelData(channelIndex);
    if (Math.abs(channelData[frameIndex] ?? 0) > threshold) {
      return true;
    }
  }
  return false;
}

function copyChannelSection(
  buffer: AudioBuffer,
  channelIndex: number,
  range: AudibleFrameRange,
) {
  const source =
    channelIndex < buffer.numberOfChannels
      ? buffer.getChannelData(channelIndex)
      : new Float32Array(buffer.length);
  const output = new Float32Array(range.frameCount);
  for (let frameOffset = 0; frameOffset < range.frameCount; frameOffset += 1) {
    output[frameOffset] = source[range.startFrame + frameOffset] ?? 0;
  }
  return output;
}

function measurePcmStats(channels: Float32Array[]) {
  let sumSquares = 0;
  let peak = 0;
  let sampleCount = 0;
  for (const channel of channels) {
    for (const sample of channel) {
      sumSquares += sample * sample;
      peak = Math.max(peak, Math.abs(sample));
      sampleCount += 1;
    }
  }
  return {
    rms: sampleCount > 0 ? Math.sqrt(sumSquares / sampleCount) : 0,
    peak,
  };
}

function clampSample(sample: number) {
  return Math.max(-1, Math.min(1, sample));
}

function toSignedPcm16(sample: number) {
  const clamped = clampSample(sample);
  return clamped < 0
    ? Math.round(clamped * 0x8000)
    : Math.round(clamped * 0x7fff);
}

function writeAscii(view: DataView, offset: number, value: string) {
  for (let index = 0; index < value.length; index += 1) {
    view.setUint8(offset + index, value.charCodeAt(index));
  }
}

function buildProcessedAudioFileName(fileName: string) {
  const normalizedName = fileName.trim();
  if (!normalizedName) {
    return 'creative-audio.wav';
  }
  return /\.[^.]+$/u.test(normalizedName)
    ? normalizedName.replace(/\.[^.]+$/u, '.wav')
    : `${normalizedName}.wav`;
}

function formatDurationSeconds(durationMs: number) {
  return Number.isInteger(durationMs / 1000)
    ? String(durationMs / 1000)
    : (durationMs / 1000).toFixed(1);
}