Files
Genarrative/scripts/loadtest/extract-works-list-data.mjs

371 lines
12 KiB
JavaScript

#!/usr/bin/env node
import { readFile, writeFile } from 'node:fs/promises';
import { basename } from 'node:path';
import { fileURLToPath } from 'node:url';
const ALLOWED_TABLES = new Set([
'puzzle_work_profile',
'custom_world_profile',
'match3d_work_profile',
'square_hole_work_profile',
'big_fish_work_profile',
'visual_novel_work_profile',
]);
const WORK_TABLE_TYPES = {
puzzle_work_profile: 'puzzle',
custom_world_profile: 'customWorld',
match3d_work_profile: 'match3d',
square_hole_work_profile: 'squareHole',
big_fish_work_profile: 'bigFish',
visual_novel_work_profile: 'visualNovel',
};
const TABLE_OUTPUT_ORDER = [
'puzzle_work_profile',
'custom_world_profile',
'match3d_work_profile',
'square_hole_work_profile',
'big_fish_work_profile',
'visual_novel_work_profile',
];
const WORK_TYPES = ['puzzle', 'customWorld', 'match3d', 'squareHole', 'bigFish', 'visualNovel'];
const SHORT_TEXT_LIMIT = 120;
const LONG_TEXT_LIMIT = 500;
const SENSITIVE_PATTERN = /(token|secret|password|passwd|phone|wallet|credential|authorization|auth[_-]?key|api[_-]?key)/giu;
class StableMapper {
constructor(prefix) {
this.prefix = prefix;
this.values = new Map();
}
map(value) {
if (value === undefined || value === null || value === '') return value;
const key = String(value);
if (!this.values.has(key)) {
this.values.set(
key,
`${this.prefix}-${String(this.values.size + 1).padStart(3, '0')}`,
);
}
return this.values.get(key);
}
}
function createContext() {
return {
user: new StableMapper('user'),
session: new StableMapper('session'),
author: new StableMapper('author'),
authorCode: new StableMapper('author-code'),
publicWorkCode: new StableMapper('public-work-code'),
coverAsset: new StableMapper('asset'),
work: new StableMapper('work'),
profile: new StableMapper('profile'),
};
}
function createWorkTypeBuckets() {
return Object.fromEntries(WORK_TYPES.map((type) => [type, []]));
}
function unwrapSpacetimeOption(value) {
if (
value &&
typeof value === 'object' &&
!Array.isArray(value) &&
Object.keys(value).length === 1
) {
if (Object.prototype.hasOwnProperty.call(value, 'some')) return value.some;
if (Object.prototype.hasOwnProperty.call(value, 'none')) return undefined;
}
return value;
}
function truncateText(value, limit) {
if (value === undefined || value === null) return value;
const text = String(value).replace(/\s+/g, ' ').trim();
if (text.length <= limit) return text;
return `${text.slice(0, limit)}`;
}
function redactSensitiveText(value) {
if (value === undefined || value === null) return value;
return String(value).replace(SENSITIVE_PATTERN, '[redacted]');
}
function sanitizeCoverImageSrc(value) {
const unwrapped = unwrapSpacetimeOption(value);
if (unwrapped === undefined || unwrapped === null || unwrapped === '') return unwrapped;
const text = String(unwrapped);
if (text.startsWith('data:image/')) return '[redacted-data-image]';
let withoutQuery = text.split('?')[0].split('#')[0];
if (withoutQuery.length > 180) withoutQuery = `${withoutQuery.slice(0, 180)}`;
return withoutQuery;
}
function sanitizeLargeJson(value) {
const unwrapped = unwrapSpacetimeOption(value);
if (unwrapped === undefined || unwrapped === null) return unwrapped;
if (typeof unwrapped === 'string') {
return truncateText(redactSensitiveText(unwrapped), LONG_TEXT_LIMIT);
}
try {
return truncateText(redactSensitiveText(JSON.stringify(unwrapped)), LONG_TEXT_LIMIT);
} catch {
return truncateText(redactSensitiveText(String(unwrapped)), LONG_TEXT_LIMIT);
}
}
function firstDefined(row, keys) {
for (const key of keys) {
if (row[key] !== undefined && row[key] !== null) return row[key];
}
return undefined;
}
function sanitizeShortField(row, sanitized, key) {
if (row[key] !== undefined) {
sanitized[key] = truncateText(unwrapSpacetimeOption(row[key]), SHORT_TEXT_LIMIT);
}
}
function sanitizeWorkRow(row, ctx) {
const sanitized = {};
const profileId = unwrapSpacetimeOption(firstDefined(row, ['profile_id', 'profileId']));
const workId = unwrapSpacetimeOption(firstDefined(row, ['work_id', 'workId']));
if (profileId !== undefined) sanitized.profile_id = ctx.profile.map(profileId);
if (workId !== undefined) sanitized.work_id = ctx.work.map(workId);
if (row.owner_user_id !== undefined) {
sanitized.owner_user_id = ctx.user.map(unwrapSpacetimeOption(row.owner_user_id));
}
if (row.user_id !== undefined) sanitized.user_id = ctx.user.map(unwrapSpacetimeOption(row.user_id));
if (row.author_display_name !== undefined) {
sanitized.author_display_name = ctx.author.map(unwrapSpacetimeOption(row.author_display_name));
}
if (row.public_work_code !== undefined) {
sanitized.public_work_code = ctx.publicWorkCode.map(unwrapSpacetimeOption(row.public_work_code));
}
if (row.author_public_user_code !== undefined) {
sanitized.author_public_user_code = ctx.authorCode.map(
unwrapSpacetimeOption(row.author_public_user_code),
);
}
if (row.cover_asset_id !== undefined) {
sanitized.cover_asset_id = ctx.coverAsset.map(unwrapSpacetimeOption(row.cover_asset_id));
}
if (row.cover_image_src !== undefined) sanitized.cover_image_src = sanitizeCoverImageSrc(row.cover_image_src);
for (const key of [
'title',
'work_title',
'level_name',
'world_name',
'summary',
'summary_text',
'description',
'work_description',
'subtitle',
]) {
sanitizeShortField(row, sanitized, key);
}
for (const key of ['levels_json', 'profile_payload_json', 'anchor_pack_json', 'theme_tags_json']) {
if (row[key] !== undefined) sanitized[key] = sanitizeLargeJson(row[key]);
}
const passthroughKeys = [
'publication_status',
'publicationStatus',
'play_count',
'playCount',
'like_count',
'likeCount',
'remix_count',
'remixCount',
'updated_at',
'created_at',
'published_at',
'visibility',
'status',
'category',
'tags',
];
for (const key of passthroughKeys) {
if (row[key] !== undefined) sanitized[key] = unwrapSpacetimeOption(row[key]);
}
return sanitized;
}
function normalizeWork(tableName, row) {
const type = WORK_TABLE_TYPES[tableName];
return {
type,
workId: row.work_id,
profileId: row.profile_id,
ownerUserId: row.owner_user_id,
publicWorkCode: row.public_work_code,
title: row.title ?? row.work_title ?? row.level_name ?? row.world_name,
subtitle: row.subtitle ?? row.summary_text ?? row.summary ?? row.work_description ?? row.description,
publicationStatus: row.publicationStatus ?? row.publication_status ?? row.status,
playCount: row.playCount ?? row.play_count ?? 0,
likeCount: row.likeCount ?? row.like_count ?? 0,
remixCount: row.remixCount ?? row.remix_count ?? 0,
coverImageSrc: row.cover_image_src,
updatedAt: row.updated_at,
};
}
function toRowsByTable(input) {
const tables = Array.isArray(input?.tables) ? input.tables : [];
const result = new Map();
for (const table of tables) {
if (!ALLOWED_TABLES.has(table?.name)) continue;
result.set(table.name, Array.isArray(table.rows) ? table.rows : []);
}
return result;
}
export function extractWorksListData(input, options = {}) {
const ctx = createContext();
const rowsByTable = toRowsByTable(input);
const outputTables = {};
const counts = {};
const profileIds = createWorkTypeBuckets();
const workIds = createWorkTypeBuckets();
const normalizedWorks = [];
for (const tableName of TABLE_OUTPUT_ORDER) {
const sourceRows = rowsByTable.get(tableName);
if (!sourceRows) continue;
const sanitizedRows = sourceRows.map((row) => sanitizeWorkRow(row, ctx));
outputTables[tableName] = sanitizedRows;
counts[tableName] = sanitizedRows.length;
const type = WORK_TABLE_TYPES[tableName];
if (type) {
for (const row of sanitizedRows) {
if (row.profile_id) profileIds[type].push(row.profile_id);
if (row.work_id) workIds[type].push(row.work_id);
normalizedWorks.push(normalizeWork(tableName, row));
}
}
}
return {
source: options.source ?? 'unknown',
generatedAt: options.generatedAt ?? new Date().toISOString(),
counts,
tables: outputTables,
profileIds,
workIds,
normalizedWorks,
};
}
function createSampleOutput(output, maxRowsPerTable = 3) {
const tables = {};
const counts = {};
const allowedWorkIds = new Set();
const allowedProfileIds = new Set();
for (const [tableName, rows] of Object.entries(output.tables)) {
tables[tableName] = rows.slice(0, maxRowsPerTable);
counts[tableName] = tables[tableName].length;
const type = WORK_TABLE_TYPES[tableName];
if (type) {
for (const row of tables[tableName]) {
if (row.work_id) allowedWorkIds.add(row.work_id);
if (row.profile_id) allowedProfileIds.add(row.profile_id);
}
}
}
const profileIds = Object.fromEntries(
Object.entries(output.profileIds).map(([type, ids]) => [
type,
ids.filter((id) => allowedProfileIds.has(id)).slice(0, maxRowsPerTable),
]),
);
const workIds = Object.fromEntries(
Object.entries(output.workIds).map(([type, ids]) => [
type,
ids.filter((id) => allowedWorkIds.has(id)).slice(0, maxRowsPerTable),
]),
);
const normalizedWorks = output.normalizedWorks
.filter((work) => allowedWorkIds.has(work.workId) || allowedProfileIds.has(work.profileId))
.slice(0, maxRowsPerTable * 6);
return {
...output,
counts,
tables,
profileIds,
workIds,
normalizedWorks,
};
}
function parseArgs(argv) {
const args = {};
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
if (arg === '--input' || arg === '--output' || arg === '--sample-output') {
const value = argv[index + 1];
if (!value || value.startsWith('--')) throw new Error(`${arg} requires a value`);
args[arg.slice(2)] = value;
index += 1;
} else if (arg === '--help' || arg === '-h') {
args.help = true;
} else {
throw new Error(`Unknown argument: ${arg}`);
}
}
return args;
}
function usage() {
return 'Usage: node scripts/loadtest/extract-works-list-data.mjs --input <migration.json> --output <works-list.local.json> [--sample-output <works-list.sample.json>]';
}
export async function runCli(argv = process.argv.slice(2)) {
const args = parseArgs(argv);
if (args.help) {
console.log(usage());
return;
}
if (!args.input) throw new Error('Missing required --input. ' + usage());
if (!args.output) throw new Error('Missing required --output. ' + usage());
const raw = await readFile(args.input, 'utf8');
const migration = JSON.parse(raw);
const output = extractWorksListData(migration, { source: basename(args.input) });
await writeFile(args.output, `${JSON.stringify(output, null, 2)}\n`, 'utf8');
if (args['sample-output']) {
const sample = createSampleOutput(output);
await writeFile(args['sample-output'], `${JSON.stringify(sample, null, 2)}\n`, 'utf8');
}
console.log(
`works-list extracted: source=${output.source}, tables=${Object.keys(output.tables).length}, normalizedWorks=${output.normalizedWorks.length}`,
);
for (const [tableName, count] of Object.entries(output.counts)) {
console.log(` ${tableName}: ${count}`);
}
}
const isDirectRun = process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1];
if (isDirectRun) {
runCli().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exitCode = 1;
});
}