174 lines
3.4 KiB
JavaScript
174 lines
3.4 KiB
JavaScript
import { execFileSync } from 'node:child_process';
|
|
import { existsSync, readFileSync } from 'node:fs';
|
|
import { basename, extname } from 'node:path';
|
|
|
|
const TEXT_EXTENSIONS = new Set([
|
|
'.cjs',
|
|
'.controller',
|
|
'.css',
|
|
'.env',
|
|
'.html',
|
|
'.js',
|
|
'.json',
|
|
'.jsx',
|
|
'.md',
|
|
'.meta',
|
|
'.mjs',
|
|
'.ps1',
|
|
'.py',
|
|
'.rs',
|
|
'.scss',
|
|
'.sh',
|
|
'.toml',
|
|
'.ts',
|
|
'.tsx',
|
|
'.txt',
|
|
'.yaml',
|
|
'.yml',
|
|
]);
|
|
|
|
const TEXT_FILENAMES = new Set([
|
|
'.editorconfig',
|
|
'.gitattributes',
|
|
'.gitignore',
|
|
'.prettierignore',
|
|
'.prettierrc',
|
|
'.prettierrc.json',
|
|
'AGENTS.md',
|
|
'README.md',
|
|
]);
|
|
|
|
const EXCLUDED_PREFIXES = [
|
|
'.codex-logs/',
|
|
'.git/',
|
|
'.codex-cargo-home-',
|
|
'dist/',
|
|
'dist_check/',
|
|
'dist_check_monster_position/',
|
|
'media/',
|
|
'node_modules/',
|
|
'public/Icons/',
|
|
'server-rs-codex-',
|
|
'server-rs/target-',
|
|
];
|
|
|
|
const IGNORE_FILE = '.encoding-check-ignore';
|
|
const decoder = new TextDecoder('utf-8', { fatal: true });
|
|
|
|
function normalizePath(filePath) {
|
|
return filePath.replace(/\\/g, '/');
|
|
}
|
|
|
|
function shouldCheck(filePath) {
|
|
const normalizedPath = normalizePath(filePath);
|
|
|
|
// 本地 cargo cache / verify copy 不属于主工程源码,避免把临时工作区扫进仓库级编码检查。
|
|
if (EXCLUDED_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix))) {
|
|
return false;
|
|
}
|
|
|
|
const fileName = basename(normalizedPath);
|
|
const extension = extname(fileName).toLowerCase();
|
|
|
|
if (TEXT_FILENAMES.has(fileName)) {
|
|
return true;
|
|
}
|
|
|
|
if (fileName.startsWith('.env')) {
|
|
return true;
|
|
}
|
|
|
|
return TEXT_EXTENSIONS.has(extension);
|
|
}
|
|
|
|
function listFilesFromGit() {
|
|
const output = execFileSync(
|
|
'git',
|
|
['ls-files', '--cached', '--others', '--exclude-standard', '-z'],
|
|
{ encoding: 'utf8', maxBuffer: 16 * 1024 * 1024 }
|
|
);
|
|
|
|
return output
|
|
.split('\0')
|
|
.filter(Boolean)
|
|
.map(normalizePath)
|
|
.filter(shouldCheck);
|
|
}
|
|
|
|
function loadIgnoreList() {
|
|
if (!existsSync(IGNORE_FILE)) {
|
|
return new Set();
|
|
}
|
|
|
|
return new Set(
|
|
readFileSync(IGNORE_FILE, 'utf8')
|
|
.split(/\r?\n/u)
|
|
.map((line) => line.trim())
|
|
.filter((line) => line !== '' && !line.startsWith('#'))
|
|
.map(normalizePath)
|
|
);
|
|
}
|
|
|
|
function hasNullByte(buffer) {
|
|
for (const byte of buffer) {
|
|
if (byte === 0) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function validateUtf8(filePath) {
|
|
if (!existsSync(filePath)) {
|
|
return null;
|
|
}
|
|
|
|
const bytes = readFileSync(filePath);
|
|
|
|
if (hasNullByte(bytes)) {
|
|
return null;
|
|
}
|
|
|
|
let text;
|
|
|
|
try {
|
|
text = decoder.decode(bytes);
|
|
} catch {
|
|
return `${filePath} is not valid UTF-8.`;
|
|
}
|
|
|
|
if (text.includes('\uFFFD')) {
|
|
return `${filePath} contains Unicode replacement characters (U+FFFD), which usually means text was already decoded incorrectly before being saved.`;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
const explicitFiles = process.argv.slice(2).map(normalizePath);
|
|
const ignoreList = loadIgnoreList();
|
|
const filesToCheck = (explicitFiles.length ? explicitFiles : listFilesFromGit())
|
|
.filter(shouldCheck)
|
|
.filter((filePath) => !ignoreList.has(filePath));
|
|
const failures = [];
|
|
|
|
for (const filePath of filesToCheck) {
|
|
const failure = validateUtf8(filePath);
|
|
|
|
if (failure) {
|
|
failures.push(failure);
|
|
}
|
|
}
|
|
|
|
if (failures.length > 0) {
|
|
console.error('Encoding check failed:');
|
|
|
|
for (const failure of failures) {
|
|
console.error(`- ${failure}`);
|
|
}
|
|
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`Encoding check passed for ${filesToCheck.length} file(s).`);
|