feat: cherry-pick translation script improvements from PR #250

Add caching, parallel processing, and tier-based translation scripts:
- Caching system via .translation-cache.json to skip unchanged content
- --force flag to override cache and re-translate
- --parallel flag for concurrent translations
- Tier-based npm scripts (translate:tier1-4, translate:all)
- Better markdown wrapper stripping
- Translation disclaimer at top of files
- Uses Bun for better performance

Changes cherry-picked from PR #250 while preserving current version
(7.2.0) and worker scripts. Does not include translated README files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Alex Newman
2025-12-14 15:40:39 -05:00
parent c422ea133f
commit 4ddc5a01bb
3 changed files with 235 additions and 89 deletions
+6 -1
View File
@@ -47,7 +47,12 @@
"changelog:generate": "node scripts/generate-changelog.js",
"usage:analyze": "node scripts/analyze-usage.js",
"usage:today": "node scripts/analyze-usage.js $(date +%Y-%m-%d)",
"translate-readme": "npx tsx scripts/translate-readme/cli.ts -v README.md zh ko ja",
"translate-readme": "bun scripts/translate-readme/cli.ts -v -o docs/i18n README.md",
"translate:tier1": "npm run translate-readme -- zh ja pt-br ko es de fr",
"translate:tier2": "npm run translate-readme -- he ar ru pl cs nl tr uk",
"translate:tier3": "npm run translate-readme -- vi id th hi bn ro sv",
"translate:tier4": "npm run translate-readme -- it el hu fi da no",
"translate:all": "npm run translate:tier1 && npm run translate:tier2 && npm run translate:tier3 && npm run translate:tier4",
"bug-report": "npx tsx scripts/bug-report/cli.ts"
},
"dependencies": {
+54 -29
View File
@@ -1,4 +1,4 @@
#!/usr/bin/env npx tsx
#!/usr/bin/env bun
import { translateReadme, SUPPORTED_LANGUAGES } from "./index.ts";
@@ -11,6 +11,8 @@ interface CliArgs {
model?: string;
maxBudget?: number;
verbose: boolean;
force: boolean;
parallel: number;
help: boolean;
listLanguages: boolean;
}
@@ -39,6 +41,8 @@ OPTIONS:
-m, --model <model> Claude model to use (default: sonnet)
--max-budget <usd> Maximum budget in USD
-v, --verbose Show detailed progress
-f, --force Force re-translation ignoring cache
--parallel <n> Run n translations concurrently (default: 1)
-h, --help Show this help message
--list-languages List all supported language codes
@@ -59,40 +63,46 @@ SUPPORTED LANGUAGES:
function printLanguages(): void {
const LANGUAGE_NAMES: Record<string, string> = {
ar: "Arabic",
bg: "Bulgarian",
cs: "Czech",
da: "Danish",
de: "German",
el: "Greek",
es: "Spanish",
et: "Estonian",
fi: "Finnish",
fr: "French",
he: "Hebrew",
hi: "Hindi",
hu: "Hungarian",
id: "Indonesian",
it: "Italian",
// Tier 1 - No-brainers
zh: "Chinese (Simplified)",
ja: "Japanese",
ko: "Korean",
lt: "Lithuanian",
lv: "Latvian",
nl: "Dutch",
no: "Norwegian",
pl: "Polish",
pt: "Portuguese",
"pt-br": "Brazilian Portuguese",
ro: "Romanian",
ko: "Korean",
es: "Spanish",
de: "German",
fr: "French",
// Tier 2 - Strong tech scenes
he: "Hebrew",
ar: "Arabic",
ru: "Russian",
sk: "Slovak",
sl: "Slovenian",
sv: "Swedish",
th: "Thai",
pl: "Polish",
cs: "Czech",
nl: "Dutch",
tr: "Turkish",
uk: "Ukrainian",
// Tier 3 - Emerging/Growing fast
vi: "Vietnamese",
zh: "Chinese (Simplified)",
id: "Indonesian",
th: "Thai",
hi: "Hindi",
bn: "Bengali",
ro: "Romanian",
sv: "Swedish",
// Tier 4 - Why not
it: "Italian",
el: "Greek",
hu: "Hungarian",
fi: "Finnish",
da: "Danish",
no: "Norwegian",
// Other supported
bg: "Bulgarian",
et: "Estonian",
lt: "Lithuanian",
lv: "Latvian",
pt: "Portuguese",
sk: "Slovak",
sl: "Slovenian",
"zh-tw": "Chinese (Traditional)",
};
@@ -112,6 +122,8 @@ function parseArgs(argv: string[]): CliArgs {
languages: [],
preserveCode: true,
verbose: false,
force: false,
parallel: 1,
help: false,
listLanguages: false,
};
@@ -134,6 +146,10 @@ function parseArgs(argv: string[]): CliArgs {
case "--verbose":
args.verbose = true;
break;
case "-f":
case "--force":
args.force = true;
break;
case "--no-preserve-code":
args.preserveCode = false;
break;
@@ -152,6 +168,13 @@ function parseArgs(argv: string[]): CliArgs {
case "--max-budget":
args.maxBudget = parseFloat(argv[++i]);
break;
case "--parallel":
args.parallel = parseInt(argv[++i], 10);
if (isNaN(args.parallel) || args.parallel < 1) {
console.error("Error: --parallel must be a positive integer");
process.exit(1);
}
break;
default:
if (arg.startsWith("-")) {
console.error(`Unknown option: ${arg}`);
@@ -215,6 +238,8 @@ async function main(): Promise<void> {
model: args.model,
maxBudgetUsd: args.maxBudget,
verbose: args.verbose,
force: args.force,
parallel: args.parallel,
});
// Exit with error code if any translations failed
+175 -59
View File
@@ -1,6 +1,34 @@
import { query, type SDKMessage, type SDKResultMessage } from "@anthropic-ai/claude-agent-sdk";
import * as fs from "fs/promises";
import * as path from "path";
import { createHash } from "crypto";
interface TranslationCache {
sourceHash: string;
lastUpdated: string;
translations: Record<string, {
hash: string;
translatedAt: string;
costUsd: number;
}>;
}
function hashContent(content: string): string {
return createHash("sha256").update(content).digest("hex").slice(0, 16);
}
async function readCache(cachePath: string): Promise<TranslationCache | null> {
try {
const data = await fs.readFile(cachePath, "utf-8");
return JSON.parse(data);
} catch {
return null;
}
}
async function writeCache(cachePath: string, cache: TranslationCache): Promise<void> {
await fs.writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8");
}
export interface TranslationOptions {
/** Source README file path */
@@ -19,6 +47,10 @@ export interface TranslationOptions {
maxBudgetUsd?: number;
/** Verbose output */
verbose?: boolean;
/** Force re-translation even if cached */
force?: boolean;
/** Number of concurrent translations (default: 1) */
parallel?: number;
}
export interface TranslationResult {
@@ -27,6 +59,8 @@ export interface TranslationResult {
success: boolean;
error?: string;
costUsd?: number;
/** Whether this was served from cache */
cached?: boolean;
}
export interface TranslationJobResult {
@@ -37,40 +71,46 @@ export interface TranslationJobResult {
}
const LANGUAGE_NAMES: Record<string, string> = {
ar: "Arabic",
bg: "Bulgarian",
cs: "Czech",
da: "Danish",
de: "German",
el: "Greek",
es: "Spanish",
et: "Estonian",
fi: "Finnish",
fr: "French",
he: "Hebrew",
hi: "Hindi",
hu: "Hungarian",
id: "Indonesian",
it: "Italian",
// Tier 1 - No-brainers
zh: "Chinese (Simplified)",
ja: "Japanese",
ko: "Korean",
lt: "Lithuanian",
lv: "Latvian",
nl: "Dutch",
no: "Norwegian",
pl: "Polish",
pt: "Portuguese",
"pt-br": "Brazilian Portuguese",
ro: "Romanian",
ko: "Korean",
es: "Spanish",
de: "German",
fr: "French",
// Tier 2 - Strong tech scenes
he: "Hebrew",
ar: "Arabic",
ru: "Russian",
sk: "Slovak",
sl: "Slovenian",
sv: "Swedish",
th: "Thai",
pl: "Polish",
cs: "Czech",
nl: "Dutch",
tr: "Turkish",
uk: "Ukrainian",
// Tier 3 - Emerging/Growing fast
vi: "Vietnamese",
zh: "Chinese (Simplified)",
id: "Indonesian",
th: "Thai",
hi: "Hindi",
bn: "Bengali",
ro: "Romanian",
sv: "Swedish",
// Tier 4 - Why not
it: "Italian",
el: "Greek",
hu: "Hungarian",
fi: "Finnish",
da: "Danish",
no: "Norwegian",
// Other supported
bg: "Bulgarian",
et: "Estonian",
lt: "Lithuanian",
lv: "Latvian",
pt: "Portuguese",
sk: "Slovak",
sl: "Slovenian",
"zh-tw": "Chinese (Traditional)",
};
@@ -107,6 +147,7 @@ Guidelines:
- Preserve technical accuracy
- Use appropriate technical terminology for ${languageName}
- Keep proper nouns (product names, company names) unchanged unless they have official translations
- Add a small note at the very top of the document (before any other content) in ${languageName}: "🌐 This is an automated translation. Community corrections are welcome!"
Here is the README content to translate:
@@ -114,7 +155,12 @@ Here is the README content to translate:
${content}
---
Output ONLY the translated README content, nothing else. Do not include any preamble or explanation.`;
CRITICAL OUTPUT RULES:
- Output ONLY the raw translated markdown content
- Do NOT wrap output in \`\`\`markdown code fences
- Do NOT add any preamble, explanation, or commentary
- Start directly with the translation note, then the content
- The output will be saved directly to a .md file`;
let translation = "";
let costUsd = 0;
@@ -182,7 +228,21 @@ Always output only the translated content without any surrounding explanation.`,
process.stdout.write("\r" + " ".repeat(60) + "\r");
}
return { translation: translation.trim(), costUsd };
// Strip markdown code fences if Claude wrapped the output
let cleaned = translation.trim();
if (cleaned.startsWith("```markdown")) {
cleaned = cleaned.slice("```markdown".length);
} else if (cleaned.startsWith("```md")) {
cleaned = cleaned.slice("```md".length);
} else if (cleaned.startsWith("```")) {
cleaned = cleaned.slice(3);
}
if (cleaned.endsWith("```")) {
cleaned = cleaned.slice(0, -3);
}
cleaned = cleaned.trim();
return { translation: cleaned, costUsd };
}
export async function translateReadme(
@@ -197,6 +257,8 @@ export async function translateReadme(
model,
maxBudgetUsd,
verbose = false,
force = false,
parallel = 1,
} = options;
// Read source file
@@ -207,6 +269,12 @@ export async function translateReadme(
const outDir = outputDir ? path.resolve(outputDir) : path.dirname(sourcePath);
await fs.mkdir(outDir, { recursive: true });
// Compute content hash and load cache
const sourceHash = hashContent(content);
const cachePath = path.join(outDir, ".translation-cache.json");
const cache = await readCache(cachePath);
const isHashMatch = cache?.sourceHash === sourceHash;
const results: TranslationResult[] = [];
let totalCostUsd = 0;
@@ -214,24 +282,28 @@ export async function translateReadme(
console.log(`📖 Source: ${sourcePath}`);
console.log(`📂 Output: ${outDir}`);
console.log(`🌍 Languages: ${languages.join(", ")}`);
if (parallel > 1) {
console.log(`⚡ Parallel: ${parallel} concurrent translations`);
}
console.log("");
}
for (const lang of languages) {
// Check budget
if (maxBudgetUsd && totalCostUsd >= maxBudgetUsd) {
results.push({
language: lang,
outputPath: "",
success: false,
error: "Budget exceeded",
});
continue;
}
// Worker function for a single language
async function translateLang(lang: string): Promise<TranslationResult> {
const outputFilename = pattern.replace("{lang}", lang);
const outputPath = path.join(outDir, outputFilename);
// Check cache (unless --force)
if (!force && isHashMatch && cache?.translations[lang]) {
const outputExists = await fs.access(outputPath).then(() => true).catch(() => false);
if (outputExists) {
if (verbose) {
console.log(`${outputFilename} (cached, unchanged)`);
}
return { language: lang, outputPath, success: true, cached: true, costUsd: 0 };
}
}
if (verbose) {
console.log(`🔄 Translating to ${getLanguageName(lang)} (${lang})...`);
}
@@ -240,37 +312,81 @@ export async function translateReadme(
const { translation, costUsd } = await translateToLanguage(content, lang, {
preserveCode,
model,
verbose,
verbose: verbose && parallel === 1, // Only show progress spinner for sequential
});
await fs.writeFile(outputPath, translation, "utf-8");
totalCostUsd += costUsd;
results.push({
language: lang,
outputPath,
success: true,
costUsd,
});
if (verbose) {
console.log(` ✅ Saved to ${outputFilename} ($${costUsd.toFixed(4)})`);
}
return { language: lang, outputPath, success: true, costUsd };
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
results.push({
language: lang,
outputPath,
success: false,
error: errorMessage,
});
if (verbose) {
console.log(`Failed: ${errorMessage}`);
console.log(`${lang} failed: ${errorMessage}`);
}
return { language: lang, outputPath, success: false, error: errorMessage };
}
}
// Run with concurrency limit
async function runWithConcurrency<T>(items: T[], limit: number, fn: (item: T) => Promise<TranslationResult>): Promise<TranslationResult[]> {
const results: TranslationResult[] = [];
const executing: Promise<void>[] = [];
for (const item of items) {
// Check budget before starting new translation
if (maxBudgetUsd && totalCostUsd >= maxBudgetUsd) {
results.push({
language: String(item),
outputPath: "",
success: false,
error: "Budget exceeded",
});
continue;
}
const p = fn(item).then((result) => {
results.push(result);
if (result.costUsd) {
totalCostUsd += result.costUsd;
}
});
executing.push(p.then(() => {
executing.splice(executing.indexOf(p.then(() => {})), 1);
}));
if (executing.length >= limit) {
await Promise.race(executing);
}
}
await Promise.all(executing);
return results;
}
const translationResults = await runWithConcurrency(languages, parallel, translateLang);
results.push(...translationResults);
// Save updated cache
const newCache: TranslationCache = {
sourceHash,
lastUpdated: new Date().toISOString(),
translations: {
...(isHashMatch ? cache?.translations : {}),
...Object.fromEntries(
results.filter(r => r.success && !r.cached).map(r => [
r.language,
{ hash: sourceHash, translatedAt: new Date().toISOString(), costUsd: r.costUsd || 0 }
])
),
},
};
await writeCache(cachePath, newCache);
const successful = results.filter((r) => r.success).length;
const failed = results.filter((r) => !r.success).length;