diff --git a/scripts/scrubts.py b/scripts/scrubts.py index ede6af60d43..6426189a347 100644 --- a/scripts/scrubts.py +++ b/scripts/scrubts.py @@ -10,15 +10,16 @@ import argparse import pathlib -import re import sys - +from dataclasses import dataclass def rewriteLines(input, scrubbedContext, tsFilePath): result = [] previouslyInContext = False contextWasPresent = False messageHashes = [] + mergedContextsCount = 0 + removedDuplicatesCount = 0 lineIter = iter(input) for line in lineIter: @@ -27,6 +28,7 @@ def rewriteLines(input, scrubbedContext, tsFilePath): if line.count(scrubbedContext + r"") == 1: # It the context being scrubbed contextWasPresent = True if previouslyInContext: # Previous context was a scrubbed context, so merge them + mergedContextsCount += 1 result = result[ : -2] # Remove recent: \n continue # ...and skip this input line else: @@ -35,7 +37,7 @@ def rewriteLines(input, scrubbedContext, tsFilePath): previouslyInContext = False # Message de-duplicating - if previouslyInContext and line.count(r"") == 1: # message in scrubbed context + if previouslyInContext and line.count(r"") == 1: # Any new context + inContext = (line.count(scrubbedContext + r"") == 1) + continue + if line.count(r".. (possibly multi-line) + sourceXml.append(sourceLine) + if sourceLine.count(r"") == 1: + break + sourceXmlHash = hash(str(sourceXml)) + translationXml = [] + for translationLine in lineIter: # .. (possibly multi-line) + translationXml.append(translationLine) + if translationLine.count(r"") == 1: + break + translation = Translation(lineNr + 1, translationXml) + if sourceXmlHash in messages: + messages[sourceXmlHash].translations.append(translation) + else: + messages[sourceXmlHash] = Source(sourceXml, [translation]) + + for sourceId in messages: + source = messages[sourceId] + translationsCount = len(source.translations) + if translationsCount > 1: + print (f"\n{translationsCount} duplicates for source:") + for sourceXmlLine in source.sourceXml: + print (sourceXmlLine.rstrip()) + for translation in source.translations: + print (f"\n{tsFilePath}:{translation.lineNr}") + for translationXmlLine in translation.translationXml: + print (translationXmlLine.rstrip()) + + def processTsFile(tsFilePath, scrubbedContext): with open(tsFilePath, 'r') as tsInputFile: lines = tsInputFile.readlines() result = rewriteLines(lines, scrubbedContext, tsFilePath) + if lines != result: + with open(tsFilePath, 'w') as tsOutputFile: + for line in result: + tsOutputFile.write(line) - with open(tsFilePath, 'w') as tsOutputFile: - for line in result: - tsOutputFile.write(line) + findDistinctDuplicates(result, scrubbedContext, tsFilePath) def main(): - parser = argparse.ArgumentParser(description='Rewrites a .ts file, removing duplicate messages ' - 'of a specified translation context and joining ' - 'adjacent occurrences of that context. ' - 'Unlike lrelease and lconvert, this script does ' - 'an exact comparison of the whole xml ' - 'tag.') + parser = argparse.ArgumentParser( + description='''Rewrites a .ts file, removing identical duplicate messages of a specified + translation context and joining adjacent occurrences of that context. + Unlike lrelease and lconvert, this script does an exact comparison of the + whole xml tag when removing duplicates. + Subsequently, the remaining duplicate messages with identical source but + different translation are listed with filename:linenumber.''') parser.add_argument('tsfile', help='The .ts file to be processed.', type=pathlib.Path)