import { execFileSync } from 'node:child_process'; import { existsSync, readFileSync } from 'node:fs'; import { basename, extname } from 'node:path'; const TEXT_EXTENSIONS = new Set([ '.cjs', '.controller', '.css', '.env', '.html', '.js', '.json', '.jsx', '.md', '.meta', '.mjs', '.ps1', '.py', '.scss', '.sh', '.toml', '.ts', '.tsx', '.txt', '.yaml', '.yml', ]); const TEXT_FILENAMES = new Set([ '.editorconfig', '.gitattributes', '.gitignore', '.prettierignore', '.prettierrc', '.prettierrc.json', 'AGENTS.md', 'README.md', ]); const EXCLUDED_PREFIXES = [ '.codex-logs/', '.git/', 'dist/', 'dist_check/', 'dist_check_monster_position/', 'media/', 'node_modules/', 'public/Icons/', ]; const IGNORE_FILE = '.encoding-check-ignore'; const decoder = new TextDecoder('utf-8', { fatal: true }); function normalizePath(filePath) { return filePath.replace(/\\/g, '/'); } function shouldCheck(filePath) { const normalizedPath = normalizePath(filePath); if (EXCLUDED_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix))) { return false; } const fileName = basename(normalizedPath); const extension = extname(fileName).toLowerCase(); if (TEXT_FILENAMES.has(fileName)) { return true; } if (fileName.startsWith('.env')) { return true; } return TEXT_EXTENSIONS.has(extension); } function listFilesFromGit() { const output = execFileSync( 'git', ['ls-files', '--cached', '--others', '--exclude-standard', '-z'], { encoding: 'utf8', maxBuffer: 16 * 1024 * 1024 } ); return output .split('\0') .filter(Boolean) .map(normalizePath) .filter(shouldCheck); } function loadIgnoreList() { if (!existsSync(IGNORE_FILE)) { return new Set(); } return new Set( readFileSync(IGNORE_FILE, 'utf8') .split(/\r?\n/u) .map((line) => line.trim()) .filter((line) => line !== '' && !line.startsWith('#')) .map(normalizePath) ); } function hasNullByte(buffer) { for (const byte of buffer) { if (byte === 0) { return true; } } return false; } function validateUtf8(filePath) { if (!existsSync(filePath)) { return null; } const bytes = readFileSync(filePath); if (hasNullByte(bytes)) { return null; } let text; try { text = decoder.decode(bytes); } catch { return `${filePath} is not valid UTF-8.`; } if (text.includes('\uFFFD')) { return `${filePath} contains Unicode replacement characters (U+FFFD), which usually means text was already decoded incorrectly before being saved.`; } return null; } const explicitFiles = process.argv.slice(2).map(normalizePath); const ignoreList = loadIgnoreList(); const filesToCheck = (explicitFiles.length ? explicitFiles : listFilesFromGit()) .filter(shouldCheck) .filter((filePath) => !ignoreList.has(filePath)); const failures = []; for (const filePath of filesToCheck) { const failure = validateUtf8(filePath); if (failure) { failures.push(failure); } } if (failures.length > 0) { console.error('Encoding check failed:'); for (const failure of failures) { console.error(`- ${failure}`); } process.exit(1); } console.log(`Encoding check passed for ${filesToCheck.length} file(s).`);