-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtranslate_missing.py
More file actions
120 lines (102 loc) · 3.79 KB
/
translate_missing.py
File metadata and controls
120 lines (102 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
"""
translate_missing.py
This script translates missing or empty keys from the English i18n YAML
file (hyrax.en.yml) into one or more target languages using a local
LibreTranslate endpoint.
Why Python?
- Preserves YAML formatting, comments, and inline/block style using ruamel.yaml
- Ruby's built-in YAML tools rewrite formatting, causing unnecessary diffs
in Git when only translation values change.
- This script is run occasionally, so introducing a small Python helper
is practical for keeping translation diffs clean.
Usage:
Set Up:
1. Make sure LibreTranslate is running locally in a separate shell:
$ pip install libretranslate
$ libretranslate
Run the script:
All at once (multiple languages):
$ python translate_missing.py config/locales/hyrax.en.yml es zh it fr pt-BR
One at a time (single language):
$ python translate_missing.py config/locales/hyrax.en.yml es
"""
import json
import sys
import time
import requests
from ruamel.yaml import YAML
from pathlib import Path
# ----------------------
# Usage Issue Check
# ----------------------
if len(sys.argv) < 3:
print("Usage: python translate_locales.py <base_file> <lang1> [<lang2> ...]")
sys.exit(1)
# ----------------------
# Configuration
# ----------------------
input_filepath = sys.argv[1]
filename = os.path.basename(filepath)
source_lang = os.path.splitext(os.path.splitext(filename)[0])[1]
LIBRETRANSLATE_URL = "http://127.0.0.1:5000/translate"
yaml = YAML()
yaml.explicit_start = True # Add '---' at the top
yaml.preserve_quotes = True
yaml.width = 4096 # avoid folding long lines
# ----------------------
# Load source YAML once
# ----------------------
source_data = yaml.load(input_filepath.read_text())
source = source_data[source_lang]
# ----------------------
# Translation helper
# ----------------------
def translate_text(text, source_lang=source_lang, target_lang="es"):
if text is None or str(text).strip() == "":
return text
try:
resp = requests.post(
LIBRETRANSLATE_URL,
data={"q": text, "source": source_lang, "target": target_lang},
)
resp.raise_for_status()
translated = json.loads(resp.text)["translatedText"]
print(f"Translated ({target_lang}): {text} → {translated}")
return translated
except Exception as e:
print(f"⚠️ Translation error for '{text}': {e}")
return text
# ----------------------
# Recursive merge & translate
# ----------------------
def deep_merge_translate(source_dict, target_dict, target_lang):
result = target_dict.copy()
for key, value in source_dict.items():
if isinstance(value, dict):
result[key] = deep_merge_translate(value, result.get(key, {}), target_lang)
else:
if key not in result or str(result[key]).strip() == "":
result[key] = translate_text(value, target_lang=target_lang)
return result
# ----------------------
# Main loop over target languages
# ----------------------
for target_lang in sys.argv[2:]:
print(f"\n=== Translating into {target_lang} ===")
generalized_path_prefix = os.path.splitext(os.path.splitext(path)[0])[0]
output_file = Path(f"{generalized_path_prefix}.{target_lang}.yml")
print(f"\n Writing to {output_file} ")
# Load existing target YAML if present
if output_file.exists():
target_data = yaml.load(output_file.read_text())
target = target_data.get(target_lang, {})
else:
target = {}
# Merge and translate
merged = deep_merge_translate(source, target, target_lang)
# Write back preserving formatting
output_data = {target_lang: merged}
with output_file.open("w") as f:
yaml.dump(output_data, f)
print(f"✅ Updated {output_file} — existing translations preserved.")