Files
Genarrative/scripts/check-encoding.mjs
kdletters cbc27bad4a
Some checks failed
CI / verify (push) Has been cancelled
init with react+axum+spacetimedb
2026-04-26 18:06:23 +08:00

174 lines
3.4 KiB
JavaScript

import { execFileSync } from 'node:child_process';
import { existsSync, readFileSync } from 'node:fs';
import { basename, extname } from 'node:path';
const TEXT_EXTENSIONS = new Set([
'.cjs',
'.controller',
'.css',
'.env',
'.html',
'.js',
'.json',
'.jsx',
'.md',
'.meta',
'.mjs',
'.ps1',
'.py',
'.rs',
'.scss',
'.sh',
'.toml',
'.ts',
'.tsx',
'.txt',
'.yaml',
'.yml',
]);
const TEXT_FILENAMES = new Set([
'.editorconfig',
'.gitattributes',
'.gitignore',
'.prettierignore',
'.prettierrc',
'.prettierrc.json',
'AGENTS.md',
'README.md',
]);
const EXCLUDED_PREFIXES = [
'.codex-logs/',
'.git/',
'.codex-cargo-home-',
'dist/',
'dist_check/',
'dist_check_monster_position/',
'media/',
'node_modules/',
'public/Icons/',
'server-rs-codex-',
'server-rs/target-',
];
const IGNORE_FILE = '.encoding-check-ignore';
const decoder = new TextDecoder('utf-8', { fatal: true });
function normalizePath(filePath) {
return filePath.replace(/\\/g, '/');
}
function shouldCheck(filePath) {
const normalizedPath = normalizePath(filePath);
// 本地 cargo cache / verify copy 不属于主工程源码,避免把临时工作区扫进仓库级编码检查。
if (EXCLUDED_PREFIXES.some((prefix) => normalizedPath.startsWith(prefix))) {
return false;
}
const fileName = basename(normalizedPath);
const extension = extname(fileName).toLowerCase();
if (TEXT_FILENAMES.has(fileName)) {
return true;
}
if (fileName.startsWith('.env')) {
return true;
}
return TEXT_EXTENSIONS.has(extension);
}
function listFilesFromGit() {
const output = execFileSync(
'git',
['ls-files', '--cached', '--others', '--exclude-standard', '-z'],
{ encoding: 'utf8', maxBuffer: 16 * 1024 * 1024 }
);
return output
.split('\0')
.filter(Boolean)
.map(normalizePath)
.filter(shouldCheck);
}
function loadIgnoreList() {
if (!existsSync(IGNORE_FILE)) {
return new Set();
}
return new Set(
readFileSync(IGNORE_FILE, 'utf8')
.split(/\r?\n/u)
.map((line) => line.trim())
.filter((line) => line !== '' && !line.startsWith('#'))
.map(normalizePath)
);
}
function hasNullByte(buffer) {
for (const byte of buffer) {
if (byte === 0) {
return true;
}
}
return false;
}
function validateUtf8(filePath) {
if (!existsSync(filePath)) {
return null;
}
const bytes = readFileSync(filePath);
if (hasNullByte(bytes)) {
return null;
}
let text;
try {
text = decoder.decode(bytes);
} catch {
return `${filePath} is not valid UTF-8.`;
}
if (text.includes('\uFFFD')) {
return `${filePath} contains Unicode replacement characters (U+FFFD), which usually means text was already decoded incorrectly before being saved.`;
}
return null;
}
const explicitFiles = process.argv.slice(2).map(normalizePath);
const ignoreList = loadIgnoreList();
const filesToCheck = (explicitFiles.length ? explicitFiles : listFilesFromGit())
.filter(shouldCheck)
.filter((filePath) => !ignoreList.has(filePath));
const failures = [];
for (const filePath of filesToCheck) {
const failure = validateUtf8(filePath);
if (failure) {
failures.push(failure);
}
}
if (failures.length > 0) {
console.error('Encoding check failed:');
for (const failure of failures) {
console.error(`- ${failure}`);
}
process.exit(1);
}
console.log(`Encoding check passed for ${filesToCheck.length} file(s).`);