diff --git a/components/partition_table/gen_esp32part.py b/components/partition_table/gen_esp32part.py index 396f6ff22f..53790846d5 100755 --- a/components/partition_table/gen_esp32part.py +++ b/components/partition_table/gen_esp32part.py @@ -11,6 +11,7 @@ # SPDX-License-Identifier: Apache-2.0 import argparse import binascii +import codecs import errno import hashlib import os @@ -175,21 +176,36 @@ def critical(msg): sys.stderr.write('\n') +def get_encoding(first_bytes): + """Detect the encoding by checking for BOM (Byte Order Mark)""" + BOMS = { + codecs.BOM_UTF8: 'utf-8-sig', + codecs.BOM_UTF16_LE: 'utf-16', + codecs.BOM_UTF16_BE: 'utf-16', + codecs.BOM_UTF32_LE: 'utf-32', + codecs.BOM_UTF32_BE: 'utf-32', + } + for bom, encoding in BOMS.items(): + if first_bytes.startswith(bom): + return encoding + return 'utf-8' + + class PartitionTable(list): def __init__(self): super(PartitionTable, self).__init__(self) @classmethod def from_file(cls, f): - data = f.read() - data_is_binary = data[0:2] == PartitionDefinition.MAGIC_BYTES + bin_data = f.read() + data_is_binary = bin_data[0:2] == PartitionDefinition.MAGIC_BYTES if data_is_binary: status('Parsing binary partition input...') - return cls.from_binary(data), True + return cls.from_binary(bin_data), True - data = data.decode() + str_data = bin_data.decode(get_encoding(bin_data)) status('Parsing CSV input...') - return cls.from_csv(data), False + return cls.from_csv(str_data), False @classmethod def from_csv(cls, csv_contents): diff --git a/components/partition_table/parttool.py b/components/partition_table/parttool.py index 6751e879e0..58e939af7f 100755 --- a/components/partition_table/parttool.py +++ b/components/partition_table/parttool.py @@ -64,6 +64,7 @@ class ParttoolTarget(): gen.offset_part_table = partition_table_offset gen.primary_bootloader_offset = primary_bootloader_offset gen.recovery_bootloader_offset = recovery_bootloader_offset + gen.quiet = True def parse_esptool_args(esptool_args): results = list() @@ -84,17 +85,8 @@ class ParttoolTarget(): self.esptool_erase_args = parse_esptool_args(esptool_erase_args) if partition_table_file: - partition_table = None with open(partition_table_file, 'rb') as f: - input_is_binary = (f.read(2) == gen.PartitionDefinition.MAGIC_BYTES) - f.seek(0) - if input_is_binary: - partition_table = gen.PartitionTable.from_binary(f.read()) - - if partition_table is None: - with open(partition_table_file, 'r', encoding='utf-8') as f: - f.seek(0) - partition_table = gen.PartitionTable.from_csv(f.read()) + partition_table, _ = gen.PartitionTable.from_file(f) else: temp_file = tempfile.NamedTemporaryFile(delete=False) temp_file.close() diff --git a/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py b/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py index 0c930a8c42..189dcd8038 100755 --- a/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py +++ b/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py @@ -289,6 +289,56 @@ storage2, data, undefined, , 12k, self.assertEqual(t[7].subtype, 0x06) +class UTFCodingTests(Py23TestCase): + def test_utf8_bom_csv_file(self): + with open('partitions-utf8-bom.csv', 'rb') as csv_txt: + t, _ = gen_esp32part.PartitionTable.from_file(csv_txt) + t.verify() + self.assertEqual(t[0].name, 'nvs') # 3 BOM bytes are not part of the name + self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved + self.assertEqual(t[2].name, 'factory') + with open('partitions.bin', 'rb') as bin_file: + binary_content = bin_file.read() + self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content)) + + def test_utf8_without_bom_csv_file(self): + with open('partitions-utf8_without-bom.csv', 'rb') as csv_txt: + t, _ = gen_esp32part.PartitionTable.from_file(csv_txt) + t.verify() + self.assertEqual(t[0].name, 'nvs') + self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved + self.assertEqual(t[2].name, 'factory') + with open('partitions.bin', 'rb') as bin_file: + binary_content = bin_file.read() + self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content)) + + def test_utf8_bin_file(self): + with open('partitions.bin', 'rb') as bin_file: + t, _ = gen_esp32part.PartitionTable.from_file(bin_file) + t.verify() + self.assertEqual(t[0].name, 'nvs') + self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved + self.assertEqual(t[2].name, 'factory') + gen = t.to_csv() + self.assertIn('\nnvs,', gen) + self.assertIn('\nphy_инит_,', gen) + self.assertIn('\nfactory,', gen) + + def test_utf8_without_bom_bin_file(self): + with open('partitions-utf8-bom.bin', 'rb') as bin_file: + t, _ = gen_esp32part.PartitionTable.from_file(bin_file) + t.verify() + # If the old tool grabbed the BOM bytes for the first name then + # we do not change the name. User needs to fix the CSV file. + self.assertEqual(t[0].name, '\ufeffnvs') + self.assertEqual(t[1].name, 'phy_инит_') + self.assertEqual(t[2].name, 'factory') + gen = t.to_csv() + self.assertIn('\ufeffnvs,', gen) + self.assertIn('\nphy_инит_,', gen) + self.assertIn('\nfactory,', gen) + + class BinaryParserTests(Py23TestCase): def test_parse_one_entry(self): # type 0x30, subtype 0xee, diff --git a/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.bin b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.bin new file mode 100644 index 0000000000..6c44beb981 Binary files /dev/null and b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.bin differ diff --git a/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.csv b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.csv new file mode 100644 index 0000000000..0b0f7d04b8 --- /dev/null +++ b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.csv @@ -0,0 +1,3 @@ +nvs, data, nvs, 0x9000, 24K, +phy_инит_, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 1M, diff --git a/components/partition_table/test_gen_esp32part_host/partitions-utf8_without-bom.csv b/components/partition_table/test_gen_esp32part_host/partitions-utf8_without-bom.csv new file mode 100644 index 0000000000..c681174f8a --- /dev/null +++ b/components/partition_table/test_gen_esp32part_host/partitions-utf8_without-bom.csv @@ -0,0 +1,3 @@ +nvs, data, nvs, 0x9000, 24K, +phy_инит_, data, phy, 0xf000, 0x1000, +factory, app, factory, 0x10000, 1M, diff --git a/components/partition_table/test_gen_esp32part_host/partitions.bin b/components/partition_table/test_gen_esp32part_host/partitions.bin new file mode 100644 index 0000000000..588a06b63a Binary files /dev/null and b/components/partition_table/test_gen_esp32part_host/partitions.bin differ