| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Copyright (C) 2022 The Qt Company Ltd. | 
					
						
							| 
									
										
										
										
											2023-01-04 08:52:22 +01:00
										 |  |  | # SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # See argparse description in main | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Run on all .ts files in Qt Creator from qtcreator root dir: | 
					
						
							|  |  |  | #  for tsfile in share/qtcreator/translations/qtcreator_*.ts; do python scripts/scrubts.py $tsfile -context FooBar; done | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import argparse | 
					
						
							|  |  |  | import pathlib | 
					
						
							|  |  |  | import sys | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  | from dataclasses import dataclass | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  | from enum import Enum, auto | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | def rewriteLines(input, scrubbedContext, tsFilePath): | 
					
						
							|  |  |  |     result = [] | 
					
						
							|  |  |  |     previouslyInContext = False | 
					
						
							|  |  |  |     contextWasPresent = False | 
					
						
							|  |  |  |     messageHashes = [] | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |     mergedContextsCount = 0 | 
					
						
							|  |  |  |     removedDuplicatesCount = 0 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     lineIter = iter(input) | 
					
						
							|  |  |  |     for line in lineIter: | 
					
						
							|  |  |  |         # Context merging | 
					
						
							|  |  |  |         if line.count(r"</name>") == 1: # Any new context | 
					
						
							|  |  |  |             if line.count(scrubbedContext + r"</name>") == 1: # It the context being scrubbed | 
					
						
							|  |  |  |                 contextWasPresent = True | 
					
						
							|  |  |  |                 if previouslyInContext: # Previous context was a scrubbed context, so merge them | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |                     mergedContextsCount += 1 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  |                     result = result[ : -2] # Remove recent:   </context>\n<context> | 
					
						
							|  |  |  |                     continue               # ...and skip this input line | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     previouslyInContext = True | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 previouslyInContext = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Message de-duplicating | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |         if previouslyInContext and line.count(r"<message") == 1: # message in scrubbed context | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  |             # Iterate through message | 
					
						
							|  |  |  |             messageLines = [line] | 
					
						
							|  |  |  |             for messageLine in lineIter: | 
					
						
							|  |  |  |                 messageLines.append(messageLine) | 
					
						
							|  |  |  |                 if messageLine.count(r"</message>") == 1: # message finished | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # Duplication check | 
					
						
							|  |  |  |             messageHash = hash(str(messageLines)) | 
					
						
							|  |  |  |             if messageHash not in messageHashes: | 
					
						
							|  |  |  |                 result = result + messageLines | 
					
						
							|  |  |  |                 messageHashes.append(messageHash) # Append if not a duplicate | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 removedDuplicatesCount += 1 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         result.append(line) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not contextWasPresent: | 
					
						
							|  |  |  |         error = f"Context \"{scrubbedContext}\" was not found in {tsFilePath}" | 
					
						
							|  |  |  |         sys.exit(error) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |     print (f"{tsFilePath}:") | 
					
						
							|  |  |  |     print (f"  {removedDuplicatesCount} identical duplicate message(s) removed.") | 
					
						
							|  |  |  |     print (f"  {mergedContextsCount} occurrence(s) of context \"{scrubbedContext}\" merged.") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  |     return result | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  | def findDistinctDuplicates(input, scrubbedContext, tsFilePath): | 
					
						
							|  |  |  |     inContext = False | 
					
						
							| 
									
										
										
										
											2022-09-09 23:54:18 +02:00
										 |  |  |     inputLineNr = 0 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @dataclass | 
					
						
							|  |  |  |     class Translation: | 
					
						
							|  |  |  |         lineNr: int | 
					
						
							|  |  |  |         translationXml: [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @dataclass | 
					
						
							|  |  |  |     class Source: | 
					
						
							|  |  |  |         sourceXml: str | 
					
						
							|  |  |  |         translations: [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     messages = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     lineIter = iter(input) | 
					
						
							| 
									
										
										
										
											2022-09-09 23:54:18 +02:00
										 |  |  |     for line in lineIter: | 
					
						
							|  |  |  |         inputLineNr += 1 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |         if line.count(r"</name>") == 1: # Any new context | 
					
						
							|  |  |  |             inContext = (line.count(scrubbedContext + r"</name>") == 1) | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if line.count(r"<message") == 0: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if inContext: | 
					
						
							|  |  |  |             sourceXml = [] | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  |             translationXml = [] | 
					
						
							| 
									
										
										
										
											2022-09-09 23:54:18 +02:00
										 |  |  |             lineNr = inputLineNr | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             class Position(Enum): | 
					
						
							|  |  |  |                 MESSAGESTART = auto() | 
					
						
							|  |  |  |                 LOCATION = auto() | 
					
						
							|  |  |  |                 SOURCE = auto() | 
					
						
							|  |  |  |                 COMMENT = auto() | 
					
						
							|  |  |  |                 EXTRACOMMENT = auto() | 
					
						
							|  |  |  |                 TRANSLATORCOMMENT = auto() | 
					
						
							|  |  |  |                 TRANSLATION = auto() | 
					
						
							|  |  |  |                 MESSAGEOVER = auto() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             pos = Position.MESSAGESTART | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             for messageLine in lineIter: | 
					
						
							| 
									
										
										
										
											2022-09-09 23:54:18 +02:00
										 |  |  |                 inputLineNr += 1 | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  |                 if messageLine.count(r"<location") == 1: | 
					
						
							|  |  |  |                     pos = Position.LOCATION | 
					
						
							|  |  |  |                 elif messageLine.count(r"<source") == 1: | 
					
						
							|  |  |  |                     pos = Position.SOURCE | 
					
						
							|  |  |  |                 elif messageLine.count(r"<comment") == 1: | 
					
						
							|  |  |  |                     pos = Position.COMMENT | 
					
						
							|  |  |  |                 elif messageLine.count(r"<extracomment") == 1: | 
					
						
							|  |  |  |                     pos = Position.EXTRACOMMENT | 
					
						
							|  |  |  |                 elif messageLine.count(r"<translatorcomment") == 1: | 
					
						
							|  |  |  |                     pos = Position.TRANSLATORCOMMENT | 
					
						
							|  |  |  |                 elif messageLine.count(r"<translation") == 1: | 
					
						
							|  |  |  |                     pos = Position.TRANSLATION | 
					
						
							|  |  |  |                 elif messageLine.count(r"</message>") == 1: | 
					
						
							|  |  |  |                     pos = Position.MESSAGEOVER | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if pos == Position.SOURCE or pos == Position.COMMENT: | 
					
						
							|  |  |  |                     sourceXml.append(messageLine) | 
					
						
							|  |  |  |                 elif pos == Position.TRANSLATION or pos == Position.EXTRACOMMENT or pos == Position.TRANSLATORCOMMENT: | 
					
						
							|  |  |  |                     translationXml.append(messageLine) | 
					
						
							|  |  |  |                 elif pos == Position.MESSAGEOVER: | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |                     break | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |             sourceXmlHash = hash(str(sourceXml)) | 
					
						
							| 
									
										
										
										
											2022-09-09 23:54:18 +02:00
										 |  |  |             translation = Translation(lineNr, translationXml) | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |             if sourceXmlHash in messages: | 
					
						
							|  |  |  |                 messages[sourceXmlHash].translations.append(translation) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 messages[sourceXmlHash] = Source(sourceXml, [translation]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for sourceId in messages: | 
					
						
							|  |  |  |         source = messages[sourceId] | 
					
						
							|  |  |  |         translationsCount = len(source.translations) | 
					
						
							|  |  |  |         if translationsCount > 1: | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  |             print (f"\n==========================================") | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |             print (f"\n{translationsCount} duplicates for source:") | 
					
						
							|  |  |  |             for sourceXmlLine in source.sourceXml: | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  |                 print (sourceXmlLine.rstrip()) | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |             for translation in source.translations: | 
					
						
							|  |  |  |                 print (f"\n{tsFilePath}:{translation.lineNr}") | 
					
						
							|  |  |  |                 for translationXmlLine in translation.translationXml: | 
					
						
							| 
									
										
										
										
											2022-09-26 15:09:59 +02:00
										 |  |  |                     print (translationXmlLine.rstrip()) | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | def processTsFile(tsFilePath, scrubbedContext): | 
					
						
							|  |  |  |     with open(tsFilePath, 'r') as tsInputFile: | 
					
						
							|  |  |  |         lines = tsInputFile.readlines() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     result = rewriteLines(lines, scrubbedContext, tsFilePath) | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |     if lines != result: | 
					
						
							|  |  |  |         with open(tsFilePath, 'w') as tsOutputFile: | 
					
						
							|  |  |  |             for line in result: | 
					
						
							|  |  |  |                 tsOutputFile.write(line) | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |     findDistinctDuplicates(result, scrubbedContext, tsFilePath) | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main(): | 
					
						
							| 
									
										
										
										
											2022-09-09 11:35:55 +02:00
										 |  |  |     parser = argparse.ArgumentParser( | 
					
						
							|  |  |  |         description='''Rewrites a .ts file, removing identical duplicate messages of a specified
 | 
					
						
							|  |  |  |                        translation context and joining adjacent occurrences of that context. | 
					
						
							|  |  |  |                        Unlike lrelease and lconvert, this script does an exact comparison of the | 
					
						
							|  |  |  |                        whole <message/> xml tag when removing duplicates. | 
					
						
							|  |  |  |                        Subsequently, the remaining duplicate messages with identical source but | 
					
						
							|  |  |  |                        different translation are listed with filename:linenumber.''')
 | 
					
						
							| 
									
										
										
										
											2022-08-24 21:37:53 +02:00
										 |  |  |     parser.add_argument('tsfile', | 
					
						
							|  |  |  |                         help='The .ts file to be processed.', | 
					
						
							|  |  |  |                         type=pathlib.Path) | 
					
						
							|  |  |  |     parser.add_argument('-context', | 
					
						
							|  |  |  |                         help='Translation context to scrubbed.', | 
					
						
							|  |  |  |                         required=True) | 
					
						
							|  |  |  |     args = parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     processTsFile(args.tsfile, args.context) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     sys.exit(main()) |