2022-08-24 21:37:53 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
# Copyright (C) 2022 The Qt Company Ltd.
|
2023-01-04 08:52:22 +01:00
|
|
|
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
|
2022-08-24 21:37:53 +02:00
|
|
|
|
|
|
|
# See argparse description in main
|
|
|
|
#
|
|
|
|
# Run on all .ts files in Qt Creator from qtcreator root dir:
|
|
|
|
# for tsfile in share/qtcreator/translations/qtcreator_*.ts; do python scripts/scrubts.py $tsfile -context FooBar; done
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import pathlib
|
|
|
|
import sys
|
2022-09-09 11:35:55 +02:00
|
|
|
from dataclasses import dataclass
|
2022-09-26 15:09:59 +02:00
|
|
|
from enum import Enum, auto
|
2022-08-24 21:37:53 +02:00
|
|
|
|
|
|
|
def rewriteLines(input, scrubbedContext, tsFilePath):
|
|
|
|
result = []
|
|
|
|
previouslyInContext = False
|
|
|
|
contextWasPresent = False
|
|
|
|
messageHashes = []
|
2022-09-09 11:35:55 +02:00
|
|
|
mergedContextsCount = 0
|
|
|
|
removedDuplicatesCount = 0
|
2022-08-24 21:37:53 +02:00
|
|
|
|
|
|
|
lineIter = iter(input)
|
|
|
|
for line in lineIter:
|
|
|
|
# Context merging
|
|
|
|
if line.count(r"</name>") == 1: # Any new context
|
|
|
|
if line.count(scrubbedContext + r"</name>") == 1: # It the context being scrubbed
|
|
|
|
contextWasPresent = True
|
|
|
|
if previouslyInContext: # Previous context was a scrubbed context, so merge them
|
2022-09-09 11:35:55 +02:00
|
|
|
mergedContextsCount += 1
|
2022-08-24 21:37:53 +02:00
|
|
|
result = result[ : -2] # Remove recent: </context>\n<context>
|
|
|
|
continue # ...and skip this input line
|
|
|
|
else:
|
|
|
|
previouslyInContext = True
|
|
|
|
else:
|
|
|
|
previouslyInContext = False
|
|
|
|
|
|
|
|
# Message de-duplicating
|
2022-09-09 11:35:55 +02:00
|
|
|
if previouslyInContext and line.count(r"<message") == 1: # message in scrubbed context
|
2022-08-24 21:37:53 +02:00
|
|
|
# Iterate through message
|
|
|
|
messageLines = [line]
|
|
|
|
for messageLine in lineIter:
|
|
|
|
messageLines.append(messageLine)
|
|
|
|
if messageLine.count(r"</message>") == 1: # message finished
|
|
|
|
break
|
|
|
|
|
|
|
|
# Duplication check
|
|
|
|
messageHash = hash(str(messageLines))
|
|
|
|
if messageHash not in messageHashes:
|
|
|
|
result = result + messageLines
|
|
|
|
messageHashes.append(messageHash) # Append if not a duplicate
|
2022-09-09 11:35:55 +02:00
|
|
|
else:
|
|
|
|
removedDuplicatesCount += 1
|
2022-08-24 21:37:53 +02:00
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
result.append(line)
|
|
|
|
|
|
|
|
if not contextWasPresent:
|
|
|
|
error = f"Context \"{scrubbedContext}\" was not found in {tsFilePath}"
|
|
|
|
sys.exit(error)
|
|
|
|
|
2022-09-09 11:35:55 +02:00
|
|
|
print (f"{tsFilePath}:")
|
|
|
|
print (f" {removedDuplicatesCount} identical duplicate message(s) removed.")
|
|
|
|
print (f" {mergedContextsCount} occurrence(s) of context \"{scrubbedContext}\" merged.")
|
|
|
|
|
2022-08-24 21:37:53 +02:00
|
|
|
return result
|
|
|
|
|
|
|
|
|
2022-09-09 11:35:55 +02:00
|
|
|
def findDistinctDuplicates(input, scrubbedContext, tsFilePath):
|
|
|
|
inContext = False
|
2022-09-09 23:54:18 +02:00
|
|
|
inputLineNr = 0
|
2022-09-09 11:35:55 +02:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Translation:
|
|
|
|
lineNr: int
|
|
|
|
translationXml: []
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Source:
|
|
|
|
sourceXml: str
|
|
|
|
translations: []
|
|
|
|
|
|
|
|
messages = {}
|
|
|
|
|
|
|
|
lineIter = iter(input)
|
2022-09-09 23:54:18 +02:00
|
|
|
for line in lineIter:
|
|
|
|
inputLineNr += 1
|
2022-09-09 11:35:55 +02:00
|
|
|
if line.count(r"</name>") == 1: # Any new context
|
|
|
|
inContext = (line.count(scrubbedContext + r"</name>") == 1)
|
|
|
|
continue
|
|
|
|
if line.count(r"<message") == 0:
|
|
|
|
continue
|
|
|
|
if inContext:
|
|
|
|
sourceXml = []
|
2022-09-26 15:09:59 +02:00
|
|
|
translationXml = []
|
2022-09-09 23:54:18 +02:00
|
|
|
lineNr = inputLineNr
|
2022-09-26 15:09:59 +02:00
|
|
|
|
|
|
|
class Position(Enum):
|
|
|
|
MESSAGESTART = auto()
|
|
|
|
LOCATION = auto()
|
|
|
|
SOURCE = auto()
|
|
|
|
COMMENT = auto()
|
|
|
|
EXTRACOMMENT = auto()
|
|
|
|
TRANSLATORCOMMENT = auto()
|
|
|
|
TRANSLATION = auto()
|
|
|
|
MESSAGEOVER = auto()
|
|
|
|
|
|
|
|
pos = Position.MESSAGESTART
|
|
|
|
|
|
|
|
for messageLine in lineIter:
|
2022-09-09 23:54:18 +02:00
|
|
|
inputLineNr += 1
|
2022-09-26 15:09:59 +02:00
|
|
|
if messageLine.count(r"<location") == 1:
|
|
|
|
pos = Position.LOCATION
|
|
|
|
elif messageLine.count(r"<source") == 1:
|
|
|
|
pos = Position.SOURCE
|
|
|
|
elif messageLine.count(r"<comment") == 1:
|
|
|
|
pos = Position.COMMENT
|
|
|
|
elif messageLine.count(r"<extracomment") == 1:
|
|
|
|
pos = Position.EXTRACOMMENT
|
|
|
|
elif messageLine.count(r"<translatorcomment") == 1:
|
|
|
|
pos = Position.TRANSLATORCOMMENT
|
|
|
|
elif messageLine.count(r"<translation") == 1:
|
|
|
|
pos = Position.TRANSLATION
|
|
|
|
elif messageLine.count(r"</message>") == 1:
|
|
|
|
pos = Position.MESSAGEOVER
|
|
|
|
|
|
|
|
if pos == Position.SOURCE or pos == Position.COMMENT:
|
|
|
|
sourceXml.append(messageLine)
|
|
|
|
elif pos == Position.TRANSLATION or pos == Position.EXTRACOMMENT or pos == Position.TRANSLATORCOMMENT:
|
|
|
|
translationXml.append(messageLine)
|
|
|
|
elif pos == Position.MESSAGEOVER:
|
2022-09-09 11:35:55 +02:00
|
|
|
break
|
2022-09-26 15:09:59 +02:00
|
|
|
|
2022-09-09 11:35:55 +02:00
|
|
|
sourceXmlHash = hash(str(sourceXml))
|
2022-09-09 23:54:18 +02:00
|
|
|
translation = Translation(lineNr, translationXml)
|
2022-09-09 11:35:55 +02:00
|
|
|
if sourceXmlHash in messages:
|
|
|
|
messages[sourceXmlHash].translations.append(translation)
|
|
|
|
else:
|
|
|
|
messages[sourceXmlHash] = Source(sourceXml, [translation])
|
|
|
|
|
|
|
|
for sourceId in messages:
|
|
|
|
source = messages[sourceId]
|
|
|
|
translationsCount = len(source.translations)
|
|
|
|
if translationsCount > 1:
|
2022-09-26 15:09:59 +02:00
|
|
|
print (f"\n==========================================")
|
2022-09-09 11:35:55 +02:00
|
|
|
print (f"\n{translationsCount} duplicates for source:")
|
|
|
|
for sourceXmlLine in source.sourceXml:
|
2022-09-26 15:09:59 +02:00
|
|
|
print (sourceXmlLine.rstrip())
|
2022-09-09 11:35:55 +02:00
|
|
|
for translation in source.translations:
|
|
|
|
print (f"\n{tsFilePath}:{translation.lineNr}")
|
|
|
|
for translationXmlLine in translation.translationXml:
|
2022-09-26 15:09:59 +02:00
|
|
|
print (translationXmlLine.rstrip())
|
2022-09-09 11:35:55 +02:00
|
|
|
|
|
|
|
|
2022-08-24 21:37:53 +02:00
|
|
|
def processTsFile(tsFilePath, scrubbedContext):
|
|
|
|
with open(tsFilePath, 'r') as tsInputFile:
|
|
|
|
lines = tsInputFile.readlines()
|
|
|
|
|
|
|
|
result = rewriteLines(lines, scrubbedContext, tsFilePath)
|
2022-09-09 11:35:55 +02:00
|
|
|
if lines != result:
|
|
|
|
with open(tsFilePath, 'w') as tsOutputFile:
|
|
|
|
for line in result:
|
|
|
|
tsOutputFile.write(line)
|
2022-08-24 21:37:53 +02:00
|
|
|
|
2022-09-09 11:35:55 +02:00
|
|
|
findDistinctDuplicates(result, scrubbedContext, tsFilePath)
|
2022-08-24 21:37:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2022-09-09 11:35:55 +02:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='''Rewrites a .ts file, removing identical duplicate messages of a specified
|
|
|
|
translation context and joining adjacent occurrences of that context.
|
|
|
|
Unlike lrelease and lconvert, this script does an exact comparison of the
|
|
|
|
whole <message/> xml tag when removing duplicates.
|
|
|
|
Subsequently, the remaining duplicate messages with identical source but
|
|
|
|
different translation are listed with filename:linenumber.''')
|
2022-08-24 21:37:53 +02:00
|
|
|
parser.add_argument('tsfile',
|
|
|
|
help='The .ts file to be processed.',
|
|
|
|
type=pathlib.Path)
|
|
|
|
parser.add_argument('-context',
|
|
|
|
help='Translation context to scrubbed.',
|
|
|
|
required=True)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
processTsFile(args.tsfile, args.context)
|
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
sys.exit(main())
|