Add deduplicate translations script (#96384)

* Add deduplicate script

* Fix forecast_solar incorrect key with space

* Fix utf-8

* Do not create references to other arbitrary other integrations

* Add commented code to only allow applying to referencing integrations

* Tweak

* Bug fix

* Add command line arg for limit reference

* never suggest to update common keys

* Output of script

* Apply suggestions from code review

Co-authored-by: Michael <35783820+mib1185@users.noreply.github.com>

---------

Co-authored-by: Michael <35783820+mib1185@users.noreply.github.com>
This commit is contained in:
Paulus Schoutsen
2023-07-13 11:52:50 -04:00
committed by GitHub
parent c54ceb2da2
commit 7859be6481
198 changed files with 1004 additions and 846 deletions

View File

@@ -0,0 +1,131 @@
"""Deduplicate translations in strings.json."""
import argparse
import json
from pathlib import Path
from homeassistant.const import Platform
from . import upload
from .develop import flatten_translations
from .util import get_base_arg_parser
def get_arguments() -> argparse.Namespace:
"""Get parsed passed in arguments."""
parser = get_base_arg_parser()
parser.add_argument(
"--limit-reference",
"--lr",
action="store_true",
help="Only allow references to same strings.json or common.",
)
return parser.parse_args()
STRINGS_PATH = "homeassistant/components/{}/strings.json"
ENTITY_COMPONENT_PREFIX = tuple(f"component::{domain}::" for domain in Platform)
def run():
"""Clean translations."""
args = get_arguments()
translations = upload.generate_upload_data()
flattened_translations = flatten_translations(translations)
flattened_translations = {
key: value
for key, value in flattened_translations.items()
# Skip existing references
if not value.startswith("[%key:")
}
primary = {}
secondary = {}
for key, value in flattened_translations.items():
if key.startswith("common::"):
primary[value] = key
elif key.startswith(ENTITY_COMPONENT_PREFIX):
primary.setdefault(value, key)
else:
secondary.setdefault(value, key)
merged = {**secondary, **primary}
# Questionable translations are ones that are duplicate but are not referenced
# by the common strings.json or strings.json from an entity component.
questionable = set(secondary.values())
suggest_new_common = set()
update_keys = {}
for key, value in flattened_translations.items():
if merged[value] == key or key.startswith("common::"):
continue
key_integration = key.split("::")[1]
key_to_reference = merged[value]
key_to_reference_integration = key_to_reference.split("::")[1]
is_common = key_to_reference.startswith("common::")
# If we want to only add references to own integrations
# but not include entity integrations
if (
args.limit_reference
and (key_integration != key_to_reference_integration and not is_common)
# Do not create self-references in entity integrations
or key_integration in Platform.__members__.values()
):
continue
if (
# We don't want integrations to reference arbitrary other integrations
key_to_reference in questionable
# Allow reference own integration
and key_to_reference_integration != key_integration
):
suggest_new_common.add(value)
continue
update_keys[key] = f"[%key:{key_to_reference}%]"
if suggest_new_common:
print("Suggested new common words:")
for key in sorted(suggest_new_common):
print(key)
components = sorted({key.split("::")[1] for key in update_keys})
strings = {}
for component in components:
comp_strings_path = Path(STRINGS_PATH.format(component))
strings[component] = json.loads(comp_strings_path.read_text(encoding="utf-8"))
for path, value in update_keys.items():
parts = path.split("::")
parts.pop(0)
component = parts.pop(0)
to_write = strings[component]
while len(parts) > 1:
try:
to_write = to_write[parts.pop(0)]
except KeyError:
print(to_write)
raise
to_write[parts.pop(0)] = value
for component in components:
comp_strings_path = Path(STRINGS_PATH.format(component))
comp_strings_path.write_text(
json.dumps(
strings[component],
indent=2,
ensure_ascii=False,
),
encoding="utf-8",
)
return 0

View File

@@ -92,6 +92,7 @@ def substitute_reference(value, flattened_translations):
def run_single(translations, flattened_translations, integration):
"""Run the script for a single integration."""
print(f"Generating translations for {integration}")
if integration not in translations["component"]:
print("Integration has no strings.json")
@@ -114,8 +115,6 @@ def run_single(translations, flattened_translations, integration):
download.write_integration_translations()
print(f"Generating translations for {integration}")
def run():
"""Run the script."""

View File

@@ -13,7 +13,15 @@ def get_base_arg_parser() -> argparse.ArgumentParser:
parser.add_argument(
"action",
type=str,
choices=["clean", "develop", "download", "frontend", "migrate", "upload"],
choices=[
"clean",
"deduplicate",
"develop",
"download",
"frontend",
"migrate",
"upload",
],
)
parser.add_argument("--debug", action="store_true", help="Enable log output")
return parser