Skip to content

Commit a46a837

Browse files
authored
Add plural per-language forms in check-translations script (apache#52391)
Different languages have different plural forms. Our script should take the original English forms and convert them into the right plural forms for the language. Also noticed that sorting order is slightly different than the one that eslint uses. The "eslint" sorting order is now used when generating missing keys.
1 parent 7596539 commit a46a837

File tree

1 file changed

+81
-11
lines changed

1 file changed

+81
-11
lines changed

dev/i18n/check_translations_completeness.py

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,19 @@
4444
Path(__file__).parents[2] / "airflow-core" / "src" / "airflow" / "ui" / "public" / "i18n" / "locales"
4545
)
4646

47+
# Plural suffixes per language (expand as needed)
48+
PLURAL_SUFFIXES = {
49+
"en": ["_one", "_other"],
50+
"pl": ["_one", "_few", "_many", "_other"],
51+
"de": ["_one", "_other"],
52+
"fr": ["_one", "_other"],
53+
"nl": ["_one", "_other"],
54+
"ar": ["_zero", "_one", "_two", "_few", "_many", "_other"],
55+
"he": ["_one", "_other"],
56+
"ko": ["_other"],
57+
"zh-TW": ["_other"],
58+
}
59+
4760

4861
class LocaleSummary(NamedTuple):
4962
"""
@@ -84,6 +97,30 @@ class LocaleKeySet(NamedTuple):
8497
keys: set[str] | None
8598

8699

100+
def get_plural_base(key: str, suffixes: list[str]) -> str | None:
101+
for suffix in suffixes:
102+
if key.endswith(suffix):
103+
return key[: -len(suffix)]
104+
return None
105+
106+
107+
def expand_plural_keys(keys: set[str], lang: str) -> set[str]:
108+
"""
109+
For a set of keys, expand all plural bases to include all required suffixes for the language.
110+
"""
111+
suffixes = PLURAL_SUFFIXES.get(lang, ["_one", "_other"])
112+
base_to_suffixes: dict[str, set[str]] = {}
113+
for key in keys:
114+
base = get_plural_base(key, suffixes)
115+
if base:
116+
base_to_suffixes.setdefault(base, set()).add(key[len(base) :])
117+
expanded = set(keys)
118+
for base in base_to_suffixes.keys():
119+
for suffix in suffixes:
120+
expanded.add(base + suffix)
121+
return expanded
122+
123+
87124
def get_locale_files() -> list[LocaleFiles]:
88125
return [
89126
LocaleFiles(
@@ -127,33 +164,34 @@ def compare_keys(
127164
for filename in all_files:
128165
key_sets: list[LocaleKeySet] = []
129166
for lf in locale_files:
167+
keys = set()
130168
if filename in lf.files:
131169
path = LOCALES_DIR / lf.locale / filename
132170
try:
133171
data = load_json(path)
134172
keys = set(flatten_keys(data))
135173
except Exception as e:
136174
print(f"Error loading {path}: {e}")
137-
keys = set()
138-
else:
139-
keys = None
140175
key_sets.append(LocaleKeySet(locale=lf.locale, keys=keys))
141176
keys_by_locale = {ks.locale: ks.keys for ks in key_sets}
142177
en_keys = keys_by_locale.get("en", set()) or set()
178+
# Expand English keys for all required plural forms in each language
179+
expanded_en_keys = {lang: expand_plural_keys(en_keys, lang) for lang in keys_by_locale.keys()}
143180
missing_keys: dict[str, list[str]] = {}
144181
extra_keys: dict[str, list[str]] = {}
145182
missing_counts[filename] = {}
146183
for ks in key_sets:
147184
if ks.locale == "en":
148185
continue
186+
required_keys = expanded_en_keys.get(ks.locale, en_keys)
149187
if ks.keys is None:
150-
missing_keys[ks.locale] = list(en_keys)
188+
missing_keys[ks.locale] = list(required_keys)
151189
extra_keys[ks.locale] = []
152-
missing_counts[filename][ks.locale] = len(en_keys)
190+
missing_counts[filename][ks.locale] = len(required_keys)
153191
else:
154-
missing = list(en_keys - ks.keys)
192+
missing = list(required_keys - ks.keys)
155193
missing_keys[ks.locale] = missing
156-
extra_keys[ks.locale] = list(ks.keys - en_keys)
194+
extra_keys[ks.locale] = list(ks.keys - required_keys)
157195
missing_counts[filename][ks.locale] = len(missing)
158196
summary[filename] = LocaleSummary(missing_keys=missing_keys, extra_keys=extra_keys)
159197
return summary, missing_counts
@@ -429,9 +467,11 @@ def add_missing_translations(language: str, summary: dict[str, LocaleSummary], c
429467
Add missing translations for the selected language.
430468
431469
It does it by copying them from English and prefixing with 'TODO: translate:'.
470+
Ensures all required plural forms for the language are added.
432471
"""
472+
suffixes = PLURAL_SUFFIXES.get(language, ["_one", "_other"])
433473
for filename, diff in summary.items():
434-
missing_keys = diff.missing_keys.get(language, [])
474+
missing_keys = set(diff.missing_keys.get(language, []))
435475
if not missing_keys:
436476
continue
437477
en_path = LOCALES_DIR / "en" / filename
@@ -447,10 +487,23 @@ def add_missing_translations(language: str, summary: dict[str, LocaleSummary], c
447487
console.print(f"[yellow]Failed to load {language} file {language}: {e}[/yellow]")
448488
lang_data = {} # Start with an empty dict if the file doesn't exist
449489

450-
# Helper to recursively add missing keys
490+
# Helper to recursively add missing keys, including plural forms
451491
def add_keys(src, dst, prefix=""):
452492
for k, v in src.items():
453493
full_key = f"{prefix}.{k}" if prefix else k
494+
base = get_plural_base(full_key, suffixes)
495+
if base and any(full_key == base + s for s in suffixes):
496+
# Add all plural forms at the current level (not nested)
497+
for suffix in suffixes:
498+
plural_key = base + suffix
499+
key_name = plural_key.split(".")[-1]
500+
if plural_key in missing_keys:
501+
if isinstance(v, dict):
502+
dst[key_name] = {}
503+
add_keys(v, dst[key_name], plural_key)
504+
else:
505+
dst[key_name] = f"TODO: translate: {v}"
506+
continue
454507
if full_key in missing_keys:
455508
if isinstance(v, dict):
456509
dst[k] = {}
@@ -464,10 +517,27 @@ def add_keys(src, dst, prefix=""):
464517
add_keys(v, dst[k], full_key)
465518

466519
add_keys(en_data, lang_data)
467-
# Write back to file, preserving order
520+
521+
# Write back to file, preserving order and using eslint-style key sorting
522+
def eslint_key_sort(obj):
523+
if isinstance(obj, dict):
524+
# Sort keys: numbers first, then uppercase, then lowercase, then others (eslint default)
525+
def sort_key(k):
526+
if k.isdigit():
527+
return (0, int(k))
528+
if k and k[0].isupper():
529+
return (1, k)
530+
if k and k[0].islower():
531+
return (2, k)
532+
return (3, k)
533+
534+
return {k: eslint_key_sort(obj[k]) for k in sorted(obj, key=sort_key)}
535+
return obj
536+
537+
lang_data = eslint_key_sort(lang_data)
468538
lang_path.parent.mkdir(parents=True, exist_ok=True)
469539
with open(lang_path, "w", encoding="utf-8") as f:
470-
json.dump(lang_data, f, ensure_ascii=False, indent=2, sort_keys=True)
540+
json.dump(lang_data, f, ensure_ascii=False, indent=2)
471541
f.write("\n") # Ensure newline at the end of the file
472542
console.print(f"[green]Added missing translations to {lang_path}[/green]")
473543

0 commit comments

Comments
 (0)