287 lines
10 KiB
Python
287 lines
10 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Generate/normalize RimWorld DefInjected translations for ThingDef using an in-game export.
|
||
|
||
Workflow (this repo):
|
||
1) Use the in-game exporter to produce Auto_CN.xml (Chinese strings) for ThingDef.
|
||
2) Run this script to:
|
||
- prune existing mod ThingDefs/*.xml to the export keyset (removes "extra" keys),
|
||
- generate a new ZZZ_* file containing all "missing" keys translated to English (best-effort).
|
||
|
||
NOTE: The translation here is heuristic and intended to reduce manual work. Any remaining
|
||
Chinese text can be found via grep and fixed by hand.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import re
|
||
import sys
|
||
import xml.etree.ElementTree as ET
|
||
from pathlib import Path
|
||
|
||
|
||
_CJK_RE = re.compile(r"[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]")
|
||
|
||
|
||
_TERM_MAP = {
|
||
"乌拉帝国": "Wula Empire",
|
||
"乌拉": "Wula",
|
||
"合成人": "synth",
|
||
"充电站": "charging station",
|
||
"维护舱": "maintenance pod",
|
||
"地下维护站": "underground maintenance station",
|
||
"轨道输送信标": "orbital transfer beacon",
|
||
"运输舱": "transport pod",
|
||
"物资输送舱": "supply pod",
|
||
"物资回收舱": "recovery pod",
|
||
"大门": "gate",
|
||
"地板": "floor",
|
||
"堡垒墙": "fortress wall",
|
||
"掩体": "shelter",
|
||
"暗物质发电机": "dark matter generator",
|
||
"聚变发电机": "fusion generator",
|
||
"火山炮": "volcano cannon",
|
||
"反战车炮塔": "anti-vehicle turret",
|
||
"激光炮塔": "laser turret",
|
||
"迫击炮塔": "mortar turret",
|
||
"预制件": "prefab",
|
||
"空投": "airdrop",
|
||
"信标": "beacon",
|
||
"碉堡": "bunker",
|
||
"要塞": "fortress",
|
||
"大型": "large",
|
||
"小型": "small",
|
||
"前哨站": "outpost",
|
||
"炮塔群": "turret group",
|
||
"地堡": "bunker",
|
||
"感应地雷": "proximity mine",
|
||
"跃迁引擎": "teleport engine",
|
||
"编织体": "weaver core",
|
||
"作业通讯台": "operations comms console",
|
||
"挖掘机": "excavator",
|
||
"战斗挖掘机": "combat excavator",
|
||
"陆行舰": "landstrider",
|
||
"放射盾": "radiant shield",
|
||
"灵能泰坦": "Psititan",
|
||
"蛭石": "Vermiculite",
|
||
"巡飞弹": "loitering munition",
|
||
"猫猫": "Kitty",
|
||
"猫猫冲锋队": "Kitty Assault Squad",
|
||
"特战猫猫": "Special Ops Kitty",
|
||
"猫猫劳工": "Kitty Laborer",
|
||
"突击猫猫": "Assault Kitty",
|
||
"兵蚁": "Ant Trooper",
|
||
"战车": "Panzer",
|
||
"喷火战车": "Flamethrower Panzer",
|
||
"渡鸦": "Raven",
|
||
"金红石": "Rutile",
|
||
"棱晶": "Prism",
|
||
"深渊": "Abyss",
|
||
"鹅卵石": "Pebble",
|
||
"磁石": "Magnetite",
|
||
"奇怪的": "Strange",
|
||
"空投区": "drop zone",
|
||
"区域": "area",
|
||
"中型": "medium",
|
||
"微型": "mini",
|
||
"突击护航舰": "assault escort ship",
|
||
"桌子": "table",
|
||
"旗帜": "flag",
|
||
"帝国舰队": "Imperial Fleet",
|
||
"轰炸机": "bomber",
|
||
"蜂群无人机": "swarm drone",
|
||
"攻击机": "striker",
|
||
"迫击炮弹": "mortar shell",
|
||
"迫击炮": "mortar",
|
||
"等离子体": "plasma",
|
||
"爆弹": "blast round",
|
||
"铬铁": "chromite",
|
||
"磷灰": "apatite",
|
||
"机械乌拉": "Wula synth",
|
||
"神人大鹅": "Legendary Goose",
|
||
"落地中": "landing",
|
||
"建造中": "building",
|
||
"科研蓝图": "Techprint",
|
||
"许可": "permit",
|
||
"安装隐藏式天线": "Install concealed antenna",
|
||
"帝国攻击舰队已抵达": "Imperial strike fleet has arrived",
|
||
"帝国巡洋舰已抵达": "Imperial cruiser has arrived",
|
||
"帝国母舰已抵达": "Imperial mothership has arrived",
|
||
"帝国攻击舰队响应请求抵达殖民地上空!": "The Imperial strike fleet has arrived above the colony in response to your request!",
|
||
"一艘帝国巡洋舰响应请求抵达殖民地上空!": "An Imperial cruiser has arrived above the colony in response to your request!",
|
||
"一艘帝国母舰响应请求抵达殖民地上空!": "An Imperial mothership has arrived above the colony in response to your request!",
|
||
"射程": "Range",
|
||
"冲击半径": "Impact radius",
|
||
"供电半径": "Power radius",
|
||
"暗物质燃料": "Dark matter fuel",
|
||
"需要填入封装的暗物质": "Requires packaged dark matter.",
|
||
"石块": "Stone chunks",
|
||
"需要填入石块": "Requires stone chunks.",
|
||
"零部件": "Components",
|
||
"磁力光束": "Magnetic beam",
|
||
"双子魔眼": "Twin Demon Eyes",
|
||
"魔眼": "Demon Eye",
|
||
"月长石": "Moonstone",
|
||
"青金石": "Lapis Lazuli",
|
||
"火欧泊": "Fire Opal",
|
||
"铱锇": "Iridosmium",
|
||
"晶丛": "Crystal Cluster",
|
||
"陨磷": "Meteoric Phosphorus",
|
||
"横扫": "Sweep",
|
||
"链锯": "Chainsaw",
|
||
"槌头": "Hammerhead",
|
||
"无法接触。": "Cannot be reached.",
|
||
}
|
||
|
||
|
||
def _title_case_simple(text: str) -> str:
|
||
return " ".join(w[:1].upper() + w[1:] if w else "" for w in text.split())
|
||
|
||
|
||
def _apply_term_map(text: str) -> str:
|
||
out = text
|
||
for cn, en in sorted(_TERM_MAP.items(), key=lambda kv: len(kv[0]), reverse=True):
|
||
out = out.replace(cn, en)
|
||
return out
|
||
|
||
|
||
def translate_cn_to_en(text: str) -> str:
|
||
raw = (text or "").replace("\r", "").strip()
|
||
if not raw:
|
||
return ""
|
||
|
||
# Already English-ish or code; keep.
|
||
if not _CJK_RE.search(raw):
|
||
return raw
|
||
|
||
# Blueprint labels: X(蓝图) -> X (Blueprint)
|
||
raw = raw.replace("(蓝图)", " (Blueprint)")
|
||
raw = raw.replace("(建造中)", " (building)")
|
||
raw = raw.replace("(落地中)", " (landing)")
|
||
|
||
# Corpse labels: "...尸体" -> "Corpse of ..."
|
||
if raw.endswith("尸体") and "的尸体" not in raw:
|
||
name = raw.removesuffix("尸体")
|
||
name = _apply_term_map(name).strip()
|
||
return f"Corpse of {name}"
|
||
|
||
# Corpse descriptions: "...的尸体。" -> "The corpse of ..."
|
||
if raw.endswith("的尸体。"):
|
||
name = raw.removesuffix("的尸体。")
|
||
name = _apply_term_map(name)
|
||
return f"The corpse of {name}."
|
||
|
||
# Common frame instruction prefix.
|
||
raw = raw.replace(
|
||
"清理出一块场地并准备好资源,使得乌拉帝国可以向此处投放建筑。",
|
||
"Clear a landing zone and prepare the resources so the Wula Empire can airdrop a building here.",
|
||
)
|
||
raw = raw.replace(
|
||
"清理出一块场地并准备好资源,使得乌拉帝国母舰可以向此处投放大型战争机械。",
|
||
"Clear a landing zone and prepare the resources so the Wula Empire mothership can drop a large war machine here.",
|
||
)
|
||
raw = raw.replace(
|
||
"清理出一块场地并准备好资源,使得乌拉帝国母舰可以向此处派遣一艘穿梭机。",
|
||
"Clear a landing zone and prepare the resources so the Wula Empire mothership can dispatch a shuttle here.",
|
||
)
|
||
|
||
# Per-line term substitutions.
|
||
lines = raw.split("\n")
|
||
lines = [_apply_term_map(line) for line in lines]
|
||
out = "\n".join(lines)
|
||
|
||
# Quick polish for a few common lowercase nouns after mapping.
|
||
out = out.replace("Wula Empire synth", "Wula Empire synth")
|
||
out = out.replace("synth", "Synth")
|
||
out = out.replace("comms", "comms")
|
||
|
||
return out
|
||
|
||
|
||
def parse_langdata(path: Path) -> dict[str, str]:
|
||
root = ET.parse(path).getroot()
|
||
return {c.tag: (c.text or "") for c in root}
|
||
|
||
|
||
def write_langdata(path: Path, entries: list[tuple[str, str]]) -> None:
|
||
root = ET.Element("LanguageData")
|
||
for k, v in entries:
|
||
el = ET.SubElement(root, k)
|
||
el.text = v
|
||
tree = ET.ElementTree(root)
|
||
ET.indent(tree, space=" ", level=0)
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
tree.write(path, encoding="utf-8", xml_declaration=True)
|
||
|
||
|
||
def main(argv: list[str]) -> int:
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--export", type=Path, required=True, help="Exported Auto_CN.xml for ThingDef")
|
||
ap.add_argument("--mod-dir", type=Path, required=True, help="Mod Languages/English/DefInjected/ThingDefs directory")
|
||
ap.add_argument(
|
||
"--write-missing",
|
||
type=Path,
|
||
required=True,
|
||
help="Output path for generated missing translations (ZZZ_* file recommended)",
|
||
)
|
||
ap.add_argument(
|
||
"--prune-existing",
|
||
action="store_true",
|
||
help="Rewrite existing ThingDefs/*.xml to only keep keys present in export",
|
||
)
|
||
args = ap.parse_args(argv)
|
||
|
||
export_root = ET.parse(args.export).getroot()
|
||
export_items = [(c.tag, (c.text or "").replace("\r", "")) for c in export_root]
|
||
export_keys = [k for k, _ in export_items]
|
||
export_set = set(export_keys)
|
||
export_cn = {k: v for k, v in export_items}
|
||
|
||
existing_files = sorted(args.mod_dir.glob("*.xml"))
|
||
# If regenerating the missing file, do not treat the previous output as existing input.
|
||
existing_files = [p for p in existing_files if p.resolve() != args.write_missing.resolve()]
|
||
existing_by_file: dict[Path, dict[str, str]] = {}
|
||
merged_existing: dict[str, str] = {}
|
||
|
||
for f in existing_files:
|
||
data = parse_langdata(f)
|
||
existing_by_file[f] = data
|
||
# simulate in-game merge by filename order
|
||
for k, v in data.items():
|
||
merged_existing[k] = v
|
||
|
||
missing = [k for k in export_keys if k not in merged_existing]
|
||
extra = sorted([k for k in merged_existing.keys() if k not in export_set])
|
||
|
||
print(f"export_keys={len(export_keys)} present={len(export_keys)-len(missing)} missing={len(missing)} extra={len(extra)}")
|
||
|
||
if args.prune_existing and extra:
|
||
for f, data in existing_by_file.items():
|
||
kept = [(k, v) for k, v in data.items() if k in export_set]
|
||
if len(kept) == len(data):
|
||
continue
|
||
write_langdata(f, kept)
|
||
print(f"pruned_existing_files={sum(1 for f,d in existing_by_file.items() if any(k not in export_set for k in d))}")
|
||
|
||
missing_entries: list[tuple[str, str]] = []
|
||
for k in export_keys:
|
||
if k not in merged_existing:
|
||
missing_entries.append((k, translate_cn_to_en(export_cn.get(k, ""))))
|
||
|
||
write_langdata(args.write_missing, missing_entries)
|
||
print(f"wrote_missing_file={args.write_missing} missing_entries={len(missing_entries)}")
|
||
|
||
# Warn if any CJK remains.
|
||
remain = [(k, v) for k, v in missing_entries if _CJK_RE.search(v or "")]
|
||
print(f"missing_entries_with_cjk={len(remain)}")
|
||
for k, v in remain[:20]:
|
||
snippet = (v or "").strip().replace("\n", "\\n")
|
||
print(f"CJK {k} -> {snippet[:120]}")
|
||
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main(sys.argv[1:]))
|