fix(partition_table): Ignore UTF-8 BOM bytes in csv file

2025-08-01 03:34:32 +02:00 · 2025-05-07 17:22:45 +03:00
parent fe75355314
commit 41dd352149
7 changed files with 79 additions and 15 deletions
--- a/components/partition_table/gen_esp32part.py
+++ b/components/partition_table/gen_esp32part.py
@@ -11,6 +11,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import argparse
 import binascii
+import codecs
 import errno
 import hashlib
 import os
@@ -167,21 +168,36 @@ def critical(msg):
    sys.stderr.write('\n')


+def get_encoding(first_bytes):
+    """Detect the encoding by checking for BOM (Byte Order Mark)"""
+    BOMS = {
+        codecs.BOM_UTF8: 'utf-8-sig',
+        codecs.BOM_UTF16_LE: 'utf-16',
+        codecs.BOM_UTF16_BE: 'utf-16',
+        codecs.BOM_UTF32_LE: 'utf-32',
+        codecs.BOM_UTF32_BE: 'utf-32',
+    }
+    for bom, encoding in BOMS.items():
+        if first_bytes.startswith(bom):
+            return encoding
+    return 'utf-8'
+
+
 class PartitionTable(list):
    def __init__(self):
        super(PartitionTable, self).__init__(self)

    @classmethod
    def from_file(cls, f):
-        data = f.read()
-        data_is_binary = data[0:2] == PartitionDefinition.MAGIC_BYTES
+        bin_data = f.read()
+        data_is_binary = bin_data[0:2] == PartitionDefinition.MAGIC_BYTES
        if data_is_binary:
            status('Parsing binary partition input...')
-            return cls.from_binary(data), True
+            return cls.from_binary(bin_data), True

-        data = data.decode()
+        str_data = bin_data.decode(get_encoding(bin_data))
        status('Parsing CSV input...')
-        return cls.from_csv(data), False
+        return cls.from_csv(str_data), False

    @classmethod
    def from_csv(cls, csv_contents):
--- a/components/partition_table/parttool.py
+++ b/components/partition_table/parttool.py
@@ -62,6 +62,7 @@ class ParttoolTarget():
        self.baud = baud

        gen.offset_part_table = partition_table_offset
+        gen.quiet = True

        def parse_esptool_args(esptool_args):
            results = list()
@@ -82,17 +83,8 @@ class ParttoolTarget():
        self.esptool_erase_args = parse_esptool_args(esptool_erase_args)

        if partition_table_file:
-            partition_table = None
            with open(partition_table_file, 'rb') as f:
-                input_is_binary = (f.read(2) == gen.PartitionDefinition.MAGIC_BYTES)
-                f.seek(0)
-                if input_is_binary:
-                    partition_table = gen.PartitionTable.from_binary(f.read())
-
-            if partition_table is None:
-                with open(partition_table_file, 'r', encoding='utf-8') as f:
-                    f.seek(0)
-                    partition_table = gen.PartitionTable.from_csv(f.read())
+                partition_table, _ = gen.PartitionTable.from_file(f)
        else:
            temp_file = tempfile.NamedTemporaryFile(delete=False)
            temp_file.close()
--- a/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py
+++ b/components/partition_table/test_gen_esp32part_host/gen_esp32part_tests.py
@@ -246,6 +246,56 @@ storage2,       data, undefined,       , 12k,
        self.assertEqual(t[7].subtype, 0x06)


+class UTFCodingTests(Py23TestCase):
+    def test_utf8_bom_csv_file(self):
+        with open('partitions-utf8-bom.csv', 'rb') as csv_txt:
+            t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
+            t.verify()
+            self.assertEqual(t[0].name, 'nvs')  # 3 BOM bytes are not part of the name
+            self.assertEqual(t[1].name, 'phy_инит_')  # UTF-8 name is preserved
+            self.assertEqual(t[2].name, 'factory')
+            with open('partitions.bin', 'rb') as bin_file:
+                binary_content = bin_file.read()
+                self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
+
+    def test_utf8_without_bom_csv_file(self):
+        with open('partitions-utf8_without-bom.csv', 'rb') as csv_txt:
+            t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
+            t.verify()
+            self.assertEqual(t[0].name, 'nvs')
+            self.assertEqual(t[1].name, 'phy_инит_')  # UTF-8 name is preserved
+            self.assertEqual(t[2].name, 'factory')
+            with open('partitions.bin', 'rb') as bin_file:
+                binary_content = bin_file.read()
+                self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
+
+    def test_utf8_bin_file(self):
+        with open('partitions.bin', 'rb') as bin_file:
+            t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
+            t.verify()
+            self.assertEqual(t[0].name, 'nvs')
+            self.assertEqual(t[1].name, 'phy_инит_')  # UTF-8 name is preserved
+            self.assertEqual(t[2].name, 'factory')
+            gen = t.to_csv()
+            self.assertIn('\nnvs,', gen)
+            self.assertIn('\nphy_инит_,', gen)
+            self.assertIn('\nfactory,', gen)
+
+    def test_utf8_without_bom_bin_file(self):
+        with open('partitions-utf8-bom.bin', 'rb') as bin_file:
+            t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
+            t.verify()
+            # If the old tool grabbed the BOM bytes for the first name then
+            # we do not change the name. User needs to fix the CSV file.
+            self.assertEqual(t[0].name, '\ufeffnvs')
+            self.assertEqual(t[1].name, 'phy_инит_')
+            self.assertEqual(t[2].name, 'factory')
+            gen = t.to_csv()
+            self.assertIn('\ufeffnvs,', gen)
+            self.assertIn('\nphy_инит_,', gen)
+            self.assertIn('\nfactory,', gen)
+
+
 class BinaryParserTests(Py23TestCase):
    def test_parse_one_entry(self):
        # type 0x30, subtype 0xee,
--- a/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.bin
+++ b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.bin
--- a/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.csv
+++ b/components/partition_table/test_gen_esp32part_host/partitions-utf8-bom.csv
@@ -0,0 +1,3 @@
+nvs,      data, nvs,      0x9000,  24K,
+phy_инит_, data, phy,      0xf000,  0x1000,
+factory,  app,  factory,  0x10000,  1M,
--- a/components/partition_table/test_gen_esp32part_host/partitions-utf8_without-bom.csv
+++ b/components/partition_table/test_gen_esp32part_host/partitions-utf8_without-bom.csv
@@ -0,0 +1,3 @@
+nvs,      data, nvs,      0x9000,  24K,
+phy_инит_, data, phy,      0xf000,  0x1000,
+factory,  app,  factory,  0x10000,  1M,
--- a/components/partition_table/test_gen_esp32part_host/partitions.bin
+++ b/components/partition_table/test_gen_esp32part_host/partitions.bin