#!/usr/bin/env python3 """ Generate/normalize RimWorld DefInjected translations for ThingDef using an in-game export. Workflow (this repo): 1) Use the in-game exporter to produce Auto_CN.xml (Chinese strings) for ThingDef. 2) Run this script to: - prune existing mod ThingDefs/*.xml to the export keyset (removes "extra" keys), - generate a new ZZZ_* file containing all "missing" keys translated to English (best-effort). NOTE: The translation here is heuristic and intended to reduce manual work. Any remaining Chinese text can be found via grep and fixed by hand. """ from __future__ import annotations import argparse import re import sys import xml.etree.ElementTree as ET from pathlib import Path _CJK_RE = re.compile(r"[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]") _TERM_MAP = { "乌拉帝国": "Wula Empire", "乌拉": "Wula", "合成人": "synth", "充电站": "charging station", "维护舱": "maintenance pod", "地下维护站": "underground maintenance station", "轨道输送信标": "orbital transfer beacon", "运输舱": "transport pod", "物资输送舱": "supply pod", "物资回收舱": "recovery pod", "大门": "gate", "地板": "floor", "堡垒墙": "fortress wall", "掩体": "shelter", "暗物质发电机": "dark matter generator", "聚变发电机": "fusion generator", "火山炮": "volcano cannon", "反战车炮塔": "anti-vehicle turret", "激光炮塔": "laser turret", "迫击炮塔": "mortar turret", "预制件": "prefab", "空投": "airdrop", "信标": "beacon", "碉堡": "bunker", "要塞": "fortress", "大型": "large", "小型": "small", "前哨站": "outpost", "炮塔群": "turret group", "地堡": "bunker", "感应地雷": "proximity mine", "跃迁引擎": "teleport engine", "编织体": "weaver core", "作业通讯台": "operations comms console", "挖掘机": "excavator", "战斗挖掘机": "combat excavator", "陆行舰": "landstrider", "放射盾": "radiant shield", "灵能泰坦": "Psititan", "蛭石": "Vermiculite", "巡飞弹": "loitering munition", "猫猫": "Kitty", "猫猫冲锋队": "Kitty Assault Squad", "特战猫猫": "Special Ops Kitty", "猫猫劳工": "Kitty Laborer", "突击猫猫": "Assault Kitty", "兵蚁": "Ant Trooper", "战车": "Panzer", "喷火战车": "Flamethrower Panzer", "渡鸦": "Raven", "金红石": "Rutile", "棱晶": "Prism", "深渊": "Abyss", "鹅卵石": "Pebble", "磁石": "Magnetite", "奇怪的": "Strange", "空投区": "drop zone", "区域": "area", "中型": "medium", "微型": "mini", "突击护航舰": "assault escort ship", "桌子": "table", "旗帜": "flag", "帝国舰队": "Imperial Fleet", "轰炸机": "bomber", "蜂群无人机": "swarm drone", "攻击机": "striker", "迫击炮弹": "mortar shell", "迫击炮": "mortar", "等离子体": "plasma", "爆弹": "blast round", "铬铁": "chromite", "磷灰": "apatite", "机械乌拉": "Wula synth", "神人大鹅": "Legendary Goose", "落地中": "landing", "建造中": "building", "科研蓝图": "Techprint", "许可": "permit", "安装隐藏式天线": "Install concealed antenna", "帝国攻击舰队已抵达": "Imperial strike fleet has arrived", "帝国巡洋舰已抵达": "Imperial cruiser has arrived", "帝国母舰已抵达": "Imperial mothership has arrived", "帝国攻击舰队响应请求抵达殖民地上空!": "The Imperial strike fleet has arrived above the colony in response to your request!", "一艘帝国巡洋舰响应请求抵达殖民地上空!": "An Imperial cruiser has arrived above the colony in response to your request!", "一艘帝国母舰响应请求抵达殖民地上空!": "An Imperial mothership has arrived above the colony in response to your request!", "射程": "Range", "冲击半径": "Impact radius", "供电半径": "Power radius", "暗物质燃料": "Dark matter fuel", "需要填入封装的暗物质": "Requires packaged dark matter.", "石块": "Stone chunks", "需要填入石块": "Requires stone chunks.", "零部件": "Components", "磁力光束": "Magnetic beam", "双子魔眼": "Twin Demon Eyes", "魔眼": "Demon Eye", "月长石": "Moonstone", "青金石": "Lapis Lazuli", "火欧泊": "Fire Opal", "铱锇": "Iridosmium", "晶丛": "Crystal Cluster", "陨磷": "Meteoric Phosphorus", "横扫": "Sweep", "链锯": "Chainsaw", "槌头": "Hammerhead", "无法接触。": "Cannot be reached.", } def _title_case_simple(text: str) -> str: return " ".join(w[:1].upper() + w[1:] if w else "" for w in text.split()) def _apply_term_map(text: str) -> str: out = text for cn, en in sorted(_TERM_MAP.items(), key=lambda kv: len(kv[0]), reverse=True): out = out.replace(cn, en) return out def translate_cn_to_en(text: str) -> str: raw = (text or "").replace("\r", "").strip() if not raw: return "" # Already English-ish or code; keep. if not _CJK_RE.search(raw): return raw # Blueprint labels: X(蓝图) -> X (Blueprint) raw = raw.replace("(蓝图)", " (Blueprint)") raw = raw.replace("(建造中)", " (building)") raw = raw.replace("(落地中)", " (landing)") # Corpse labels: "...尸体" -> "Corpse of ..." if raw.endswith("尸体") and "的尸体" not in raw: name = raw.removesuffix("尸体") name = _apply_term_map(name).strip() return f"Corpse of {name}" # Corpse descriptions: "...的尸体。" -> "The corpse of ..." if raw.endswith("的尸体。"): name = raw.removesuffix("的尸体。") name = _apply_term_map(name) return f"The corpse of {name}." # Common frame instruction prefix. raw = raw.replace( "清理出一块场地并准备好资源,使得乌拉帝国可以向此处投放建筑。", "Clear a landing zone and prepare the resources so the Wula Empire can airdrop a building here.", ) raw = raw.replace( "清理出一块场地并准备好资源,使得乌拉帝国母舰可以向此处投放大型战争机械。", "Clear a landing zone and prepare the resources so the Wula Empire mothership can drop a large war machine here.", ) raw = raw.replace( "清理出一块场地并准备好资源,使得乌拉帝国母舰可以向此处派遣一艘穿梭机。", "Clear a landing zone and prepare the resources so the Wula Empire mothership can dispatch a shuttle here.", ) # Per-line term substitutions. lines = raw.split("\n") lines = [_apply_term_map(line) for line in lines] out = "\n".join(lines) # Quick polish for a few common lowercase nouns after mapping. out = out.replace("Wula Empire synth", "Wula Empire synth") out = out.replace("synth", "Synth") out = out.replace("comms", "comms") return out def parse_langdata(path: Path) -> dict[str, str]: root = ET.parse(path).getroot() return {c.tag: (c.text or "") for c in root} def write_langdata(path: Path, entries: list[tuple[str, str]]) -> None: root = ET.Element("LanguageData") for k, v in entries: el = ET.SubElement(root, k) el.text = v tree = ET.ElementTree(root) ET.indent(tree, space=" ", level=0) path.parent.mkdir(parents=True, exist_ok=True) tree.write(path, encoding="utf-8", xml_declaration=True) def main(argv: list[str]) -> int: ap = argparse.ArgumentParser() ap.add_argument("--export", type=Path, required=True, help="Exported Auto_CN.xml for ThingDef") ap.add_argument("--mod-dir", type=Path, required=True, help="Mod Languages/English/DefInjected/ThingDefs directory") ap.add_argument( "--write-missing", type=Path, required=True, help="Output path for generated missing translations (ZZZ_* file recommended)", ) ap.add_argument( "--prune-existing", action="store_true", help="Rewrite existing ThingDefs/*.xml to only keep keys present in export", ) args = ap.parse_args(argv) export_root = ET.parse(args.export).getroot() export_items = [(c.tag, (c.text or "").replace("\r", "")) for c in export_root] export_keys = [k for k, _ in export_items] export_set = set(export_keys) export_cn = {k: v for k, v in export_items} existing_files = sorted(args.mod_dir.glob("*.xml")) # If regenerating the missing file, do not treat the previous output as existing input. existing_files = [p for p in existing_files if p.resolve() != args.write_missing.resolve()] existing_by_file: dict[Path, dict[str, str]] = {} merged_existing: dict[str, str] = {} for f in existing_files: data = parse_langdata(f) existing_by_file[f] = data # simulate in-game merge by filename order for k, v in data.items(): merged_existing[k] = v missing = [k for k in export_keys if k not in merged_existing] extra = sorted([k for k in merged_existing.keys() if k not in export_set]) print(f"export_keys={len(export_keys)} present={len(export_keys)-len(missing)} missing={len(missing)} extra={len(extra)}") if args.prune_existing and extra: for f, data in existing_by_file.items(): kept = [(k, v) for k, v in data.items() if k in export_set] if len(kept) == len(data): continue write_langdata(f, kept) print(f"pruned_existing_files={sum(1 for f,d in existing_by_file.items() if any(k not in export_set for k in d))}") missing_entries: list[tuple[str, str]] = [] for k in export_keys: if k not in merged_existing: missing_entries.append((k, translate_cn_to_en(export_cn.get(k, "")))) write_langdata(args.write_missing, missing_entries) print(f"wrote_missing_file={args.write_missing} missing_entries={len(missing_entries)}") # Warn if any CJK remains. remain = [(k, v) for k, v in missing_entries if _CJK_RE.search(v or "")] print(f"missing_entries_with_cjk={len(remain)}") for k, v in remain[:20]: snippet = (v or "").strip().replace("\n", "\\n") print(f"CJK {k} -> {snippet[:120]}") return 0 if __name__ == "__main__": raise SystemExit(main(sys.argv[1:]))