From 4ddc5a01bb20928bb63b559c879a8b7a993b1fbb Mon Sep 17 00:00:00 2001 From: Alex Newman Date: Sun, 14 Dec 2025 15:40:39 -0500 Subject: [PATCH] feat: cherry-pick translation script improvements from PR #250 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add caching, parallel processing, and tier-based translation scripts: - Caching system via .translation-cache.json to skip unchanged content - --force flag to override cache and re-translate - --parallel flag for concurrent translations - Tier-based npm scripts (translate:tier1-4, translate:all) - Better markdown wrapper stripping - Translation disclaimer at top of files - Uses Bun for better performance Changes cherry-picked from PR #250 while preserving current version (7.2.0) and worker scripts. Does not include translated README files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- package.json | 7 +- scripts/translate-readme/cli.ts | 83 +++++++---- scripts/translate-readme/index.ts | 234 ++++++++++++++++++++++-------- 3 files changed, 235 insertions(+), 89 deletions(-) diff --git a/package.json b/package.json index ceca4a79..a4e92640 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,12 @@ "changelog:generate": "node scripts/generate-changelog.js", "usage:analyze": "node scripts/analyze-usage.js", "usage:today": "node scripts/analyze-usage.js $(date +%Y-%m-%d)", - "translate-readme": "npx tsx scripts/translate-readme/cli.ts -v README.md zh ko ja", + "translate-readme": "bun scripts/translate-readme/cli.ts -v -o docs/i18n README.md", + "translate:tier1": "npm run translate-readme -- zh ja pt-br ko es de fr", + "translate:tier2": "npm run translate-readme -- he ar ru pl cs nl tr uk", + "translate:tier3": "npm run translate-readme -- vi id th hi bn ro sv", + "translate:tier4": "npm run translate-readme -- it el hu fi da no", + "translate:all": "npm run translate:tier1 && npm run translate:tier2 && npm run translate:tier3 && npm run translate:tier4", "bug-report": "npx tsx scripts/bug-report/cli.ts" }, "dependencies": { diff --git a/scripts/translate-readme/cli.ts b/scripts/translate-readme/cli.ts index d90607e5..b92e7c3a 100644 --- a/scripts/translate-readme/cli.ts +++ b/scripts/translate-readme/cli.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env npx tsx +#!/usr/bin/env bun import { translateReadme, SUPPORTED_LANGUAGES } from "./index.ts"; @@ -11,6 +11,8 @@ interface CliArgs { model?: string; maxBudget?: number; verbose: boolean; + force: boolean; + parallel: number; help: boolean; listLanguages: boolean; } @@ -39,6 +41,8 @@ OPTIONS: -m, --model Claude model to use (default: sonnet) --max-budget Maximum budget in USD -v, --verbose Show detailed progress + -f, --force Force re-translation ignoring cache + --parallel Run n translations concurrently (default: 1) -h, --help Show this help message --list-languages List all supported language codes @@ -59,40 +63,46 @@ SUPPORTED LANGUAGES: function printLanguages(): void { const LANGUAGE_NAMES: Record = { - ar: "Arabic", - bg: "Bulgarian", - cs: "Czech", - da: "Danish", - de: "German", - el: "Greek", - es: "Spanish", - et: "Estonian", - fi: "Finnish", - fr: "French", - he: "Hebrew", - hi: "Hindi", - hu: "Hungarian", - id: "Indonesian", - it: "Italian", + // Tier 1 - No-brainers + zh: "Chinese (Simplified)", ja: "Japanese", - ko: "Korean", - lt: "Lithuanian", - lv: "Latvian", - nl: "Dutch", - no: "Norwegian", - pl: "Polish", - pt: "Portuguese", "pt-br": "Brazilian Portuguese", - ro: "Romanian", + ko: "Korean", + es: "Spanish", + de: "German", + fr: "French", + // Tier 2 - Strong tech scenes + he: "Hebrew", + ar: "Arabic", ru: "Russian", - sk: "Slovak", - sl: "Slovenian", - sv: "Swedish", - th: "Thai", + pl: "Polish", + cs: "Czech", + nl: "Dutch", tr: "Turkish", uk: "Ukrainian", + // Tier 3 - Emerging/Growing fast vi: "Vietnamese", - zh: "Chinese (Simplified)", + id: "Indonesian", + th: "Thai", + hi: "Hindi", + bn: "Bengali", + ro: "Romanian", + sv: "Swedish", + // Tier 4 - Why not + it: "Italian", + el: "Greek", + hu: "Hungarian", + fi: "Finnish", + da: "Danish", + no: "Norwegian", + // Other supported + bg: "Bulgarian", + et: "Estonian", + lt: "Lithuanian", + lv: "Latvian", + pt: "Portuguese", + sk: "Slovak", + sl: "Slovenian", "zh-tw": "Chinese (Traditional)", }; @@ -112,6 +122,8 @@ function parseArgs(argv: string[]): CliArgs { languages: [], preserveCode: true, verbose: false, + force: false, + parallel: 1, help: false, listLanguages: false, }; @@ -134,6 +146,10 @@ function parseArgs(argv: string[]): CliArgs { case "--verbose": args.verbose = true; break; + case "-f": + case "--force": + args.force = true; + break; case "--no-preserve-code": args.preserveCode = false; break; @@ -152,6 +168,13 @@ function parseArgs(argv: string[]): CliArgs { case "--max-budget": args.maxBudget = parseFloat(argv[++i]); break; + case "--parallel": + args.parallel = parseInt(argv[++i], 10); + if (isNaN(args.parallel) || args.parallel < 1) { + console.error("Error: --parallel must be a positive integer"); + process.exit(1); + } + break; default: if (arg.startsWith("-")) { console.error(`Unknown option: ${arg}`); @@ -215,6 +238,8 @@ async function main(): Promise { model: args.model, maxBudgetUsd: args.maxBudget, verbose: args.verbose, + force: args.force, + parallel: args.parallel, }); // Exit with error code if any translations failed diff --git a/scripts/translate-readme/index.ts b/scripts/translate-readme/index.ts index 30c71980..a4ebf85f 100644 --- a/scripts/translate-readme/index.ts +++ b/scripts/translate-readme/index.ts @@ -1,6 +1,34 @@ import { query, type SDKMessage, type SDKResultMessage } from "@anthropic-ai/claude-agent-sdk"; import * as fs from "fs/promises"; import * as path from "path"; +import { createHash } from "crypto"; + +interface TranslationCache { + sourceHash: string; + lastUpdated: string; + translations: Record; +} + +function hashContent(content: string): string { + return createHash("sha256").update(content).digest("hex").slice(0, 16); +} + +async function readCache(cachePath: string): Promise { + try { + const data = await fs.readFile(cachePath, "utf-8"); + return JSON.parse(data); + } catch { + return null; + } +} + +async function writeCache(cachePath: string, cache: TranslationCache): Promise { + await fs.writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8"); +} export interface TranslationOptions { /** Source README file path */ @@ -19,6 +47,10 @@ export interface TranslationOptions { maxBudgetUsd?: number; /** Verbose output */ verbose?: boolean; + /** Force re-translation even if cached */ + force?: boolean; + /** Number of concurrent translations (default: 1) */ + parallel?: number; } export interface TranslationResult { @@ -27,6 +59,8 @@ export interface TranslationResult { success: boolean; error?: string; costUsd?: number; + /** Whether this was served from cache */ + cached?: boolean; } export interface TranslationJobResult { @@ -37,40 +71,46 @@ export interface TranslationJobResult { } const LANGUAGE_NAMES: Record = { - ar: "Arabic", - bg: "Bulgarian", - cs: "Czech", - da: "Danish", - de: "German", - el: "Greek", - es: "Spanish", - et: "Estonian", - fi: "Finnish", - fr: "French", - he: "Hebrew", - hi: "Hindi", - hu: "Hungarian", - id: "Indonesian", - it: "Italian", + // Tier 1 - No-brainers + zh: "Chinese (Simplified)", ja: "Japanese", - ko: "Korean", - lt: "Lithuanian", - lv: "Latvian", - nl: "Dutch", - no: "Norwegian", - pl: "Polish", - pt: "Portuguese", "pt-br": "Brazilian Portuguese", - ro: "Romanian", + ko: "Korean", + es: "Spanish", + de: "German", + fr: "French", + // Tier 2 - Strong tech scenes + he: "Hebrew", + ar: "Arabic", ru: "Russian", - sk: "Slovak", - sl: "Slovenian", - sv: "Swedish", - th: "Thai", + pl: "Polish", + cs: "Czech", + nl: "Dutch", tr: "Turkish", uk: "Ukrainian", + // Tier 3 - Emerging/Growing fast vi: "Vietnamese", - zh: "Chinese (Simplified)", + id: "Indonesian", + th: "Thai", + hi: "Hindi", + bn: "Bengali", + ro: "Romanian", + sv: "Swedish", + // Tier 4 - Why not + it: "Italian", + el: "Greek", + hu: "Hungarian", + fi: "Finnish", + da: "Danish", + no: "Norwegian", + // Other supported + bg: "Bulgarian", + et: "Estonian", + lt: "Lithuanian", + lv: "Latvian", + pt: "Portuguese", + sk: "Slovak", + sl: "Slovenian", "zh-tw": "Chinese (Traditional)", }; @@ -107,6 +147,7 @@ Guidelines: - Preserve technical accuracy - Use appropriate technical terminology for ${languageName} - Keep proper nouns (product names, company names) unchanged unless they have official translations +- Add a small note at the very top of the document (before any other content) in ${languageName}: "🌐 This is an automated translation. Community corrections are welcome!" Here is the README content to translate: @@ -114,7 +155,12 @@ Here is the README content to translate: ${content} --- -Output ONLY the translated README content, nothing else. Do not include any preamble or explanation.`; +CRITICAL OUTPUT RULES: +- Output ONLY the raw translated markdown content +- Do NOT wrap output in \`\`\`markdown code fences +- Do NOT add any preamble, explanation, or commentary +- Start directly with the translation note, then the content +- The output will be saved directly to a .md file`; let translation = ""; let costUsd = 0; @@ -182,7 +228,21 @@ Always output only the translated content without any surrounding explanation.`, process.stdout.write("\r" + " ".repeat(60) + "\r"); } - return { translation: translation.trim(), costUsd }; + // Strip markdown code fences if Claude wrapped the output + let cleaned = translation.trim(); + if (cleaned.startsWith("```markdown")) { + cleaned = cleaned.slice("```markdown".length); + } else if (cleaned.startsWith("```md")) { + cleaned = cleaned.slice("```md".length); + } else if (cleaned.startsWith("```")) { + cleaned = cleaned.slice(3); + } + if (cleaned.endsWith("```")) { + cleaned = cleaned.slice(0, -3); + } + cleaned = cleaned.trim(); + + return { translation: cleaned, costUsd }; } export async function translateReadme( @@ -197,6 +257,8 @@ export async function translateReadme( model, maxBudgetUsd, verbose = false, + force = false, + parallel = 1, } = options; // Read source file @@ -207,6 +269,12 @@ export async function translateReadme( const outDir = outputDir ? path.resolve(outputDir) : path.dirname(sourcePath); await fs.mkdir(outDir, { recursive: true }); + // Compute content hash and load cache + const sourceHash = hashContent(content); + const cachePath = path.join(outDir, ".translation-cache.json"); + const cache = await readCache(cachePath); + const isHashMatch = cache?.sourceHash === sourceHash; + const results: TranslationResult[] = []; let totalCostUsd = 0; @@ -214,24 +282,28 @@ export async function translateReadme( console.log(`📖 Source: ${sourcePath}`); console.log(`📂 Output: ${outDir}`); console.log(`🌍 Languages: ${languages.join(", ")}`); + if (parallel > 1) { + console.log(`⚡ Parallel: ${parallel} concurrent translations`); + } console.log(""); } - for (const lang of languages) { - // Check budget - if (maxBudgetUsd && totalCostUsd >= maxBudgetUsd) { - results.push({ - language: lang, - outputPath: "", - success: false, - error: "Budget exceeded", - }); - continue; - } - + // Worker function for a single language + async function translateLang(lang: string): Promise { const outputFilename = pattern.replace("{lang}", lang); const outputPath = path.join(outDir, outputFilename); + // Check cache (unless --force) + if (!force && isHashMatch && cache?.translations[lang]) { + const outputExists = await fs.access(outputPath).then(() => true).catch(() => false); + if (outputExists) { + if (verbose) { + console.log(` ✅ ${outputFilename} (cached, unchanged)`); + } + return { language: lang, outputPath, success: true, cached: true, costUsd: 0 }; + } + } + if (verbose) { console.log(`🔄 Translating to ${getLanguageName(lang)} (${lang})...`); } @@ -240,37 +312,81 @@ export async function translateReadme( const { translation, costUsd } = await translateToLanguage(content, lang, { preserveCode, model, - verbose, + verbose: verbose && parallel === 1, // Only show progress spinner for sequential }); await fs.writeFile(outputPath, translation, "utf-8"); - totalCostUsd += costUsd; - - results.push({ - language: lang, - outputPath, - success: true, - costUsd, - }); if (verbose) { console.log(` ✅ Saved to ${outputFilename} ($${costUsd.toFixed(4)})`); } + + return { language: lang, outputPath, success: true, costUsd }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); - results.push({ - language: lang, - outputPath, - success: false, - error: errorMessage, - }); - if (verbose) { - console.log(` ❌ Failed: ${errorMessage}`); + console.log(` ❌ ${lang} failed: ${errorMessage}`); } + return { language: lang, outputPath, success: false, error: errorMessage }; } } + // Run with concurrency limit + async function runWithConcurrency(items: T[], limit: number, fn: (item: T) => Promise): Promise { + const results: TranslationResult[] = []; + const executing: Promise[] = []; + + for (const item of items) { + // Check budget before starting new translation + if (maxBudgetUsd && totalCostUsd >= maxBudgetUsd) { + results.push({ + language: String(item), + outputPath: "", + success: false, + error: "Budget exceeded", + }); + continue; + } + + const p = fn(item).then((result) => { + results.push(result); + if (result.costUsd) { + totalCostUsd += result.costUsd; + } + }); + + executing.push(p.then(() => { + executing.splice(executing.indexOf(p.then(() => {})), 1); + })); + + if (executing.length >= limit) { + await Promise.race(executing); + } + } + + await Promise.all(executing); + return results; + } + + const translationResults = await runWithConcurrency(languages, parallel, translateLang); + results.push(...translationResults); + + // Save updated cache + const newCache: TranslationCache = { + sourceHash, + lastUpdated: new Date().toISOString(), + translations: { + ...(isHashMatch ? cache?.translations : {}), + ...Object.fromEntries( + results.filter(r => r.success && !r.cached).map(r => [ + r.language, + { hash: sourceHash, translatedAt: new Date().toISOString(), costUsd: r.costUsd || 0 } + ]) + ), + }, + }; + await writeCache(cachePath, newCache); + const successful = results.filter((r) => r.success).length; const failed = results.filter((r) => !r.success).length;