fix(partition_table): Ignore UTF-8 BOM bytes in csv file

This commit is contained in:
Konstantin Kondrashov
2025-05-07 17:22:45 +03:00
parent fe75355314
commit 41dd352149
7 changed files with 79 additions and 15 deletions

View File

@@ -11,6 +11,7 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
import binascii
import codecs
import errno
import hashlib
import os
@@ -167,21 +168,36 @@ def critical(msg):
sys.stderr.write('\n')
def get_encoding(first_bytes):
"""Detect the encoding by checking for BOM (Byte Order Mark)"""
BOMS = {
codecs.BOM_UTF8: 'utf-8-sig',
codecs.BOM_UTF16_LE: 'utf-16',
codecs.BOM_UTF16_BE: 'utf-16',
codecs.BOM_UTF32_LE: 'utf-32',
codecs.BOM_UTF32_BE: 'utf-32',
}
for bom, encoding in BOMS.items():
if first_bytes.startswith(bom):
return encoding
return 'utf-8'
class PartitionTable(list):
def __init__(self):
super(PartitionTable, self).__init__(self)
@classmethod
def from_file(cls, f):
data = f.read()
data_is_binary = data[0:2] == PartitionDefinition.MAGIC_BYTES
bin_data = f.read()
data_is_binary = bin_data[0:2] == PartitionDefinition.MAGIC_BYTES
if data_is_binary:
status('Parsing binary partition input...')
return cls.from_binary(data), True
return cls.from_binary(bin_data), True
data = data.decode()
str_data = bin_data.decode(get_encoding(bin_data))
status('Parsing CSV input...')
return cls.from_csv(data), False
return cls.from_csv(str_data), False
@classmethod
def from_csv(cls, csv_contents):

View File

@@ -62,6 +62,7 @@ class ParttoolTarget():
self.baud = baud
gen.offset_part_table = partition_table_offset
gen.quiet = True
def parse_esptool_args(esptool_args):
results = list()
@@ -82,17 +83,8 @@ class ParttoolTarget():
self.esptool_erase_args = parse_esptool_args(esptool_erase_args)
if partition_table_file:
partition_table = None
with open(partition_table_file, 'rb') as f:
input_is_binary = (f.read(2) == gen.PartitionDefinition.MAGIC_BYTES)
f.seek(0)
if input_is_binary:
partition_table = gen.PartitionTable.from_binary(f.read())
if partition_table is None:
with open(partition_table_file, 'r', encoding='utf-8') as f:
f.seek(0)
partition_table = gen.PartitionTable.from_csv(f.read())
partition_table, _ = gen.PartitionTable.from_file(f)
else:
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.close()

View File

@@ -246,6 +246,56 @@ storage2, data, undefined, , 12k,
self.assertEqual(t[7].subtype, 0x06)
class UTFCodingTests(Py23TestCase):
def test_utf8_bom_csv_file(self):
with open('partitions-utf8-bom.csv', 'rb') as csv_txt:
t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
t.verify()
self.assertEqual(t[0].name, 'nvs') # 3 BOM bytes are not part of the name
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
self.assertEqual(t[2].name, 'factory')
with open('partitions.bin', 'rb') as bin_file:
binary_content = bin_file.read()
self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
def test_utf8_without_bom_csv_file(self):
with open('partitions-utf8_without-bom.csv', 'rb') as csv_txt:
t, _ = gen_esp32part.PartitionTable.from_file(csv_txt)
t.verify()
self.assertEqual(t[0].name, 'nvs')
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
self.assertEqual(t[2].name, 'factory')
with open('partitions.bin', 'rb') as bin_file:
binary_content = bin_file.read()
self.assertEqual(_strip_trailing_ffs(t.to_binary()), _strip_trailing_ffs(binary_content))
def test_utf8_bin_file(self):
with open('partitions.bin', 'rb') as bin_file:
t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
t.verify()
self.assertEqual(t[0].name, 'nvs')
self.assertEqual(t[1].name, 'phy_инит_') # UTF-8 name is preserved
self.assertEqual(t[2].name, 'factory')
gen = t.to_csv()
self.assertIn('\nnvs,', gen)
self.assertIn('\nphy_инит_,', gen)
self.assertIn('\nfactory,', gen)
def test_utf8_without_bom_bin_file(self):
with open('partitions-utf8-bom.bin', 'rb') as bin_file:
t, _ = gen_esp32part.PartitionTable.from_file(bin_file)
t.verify()
# If the old tool grabbed the BOM bytes for the first name then
# we do not change the name. User needs to fix the CSV file.
self.assertEqual(t[0].name, '\ufeffnvs')
self.assertEqual(t[1].name, 'phy_инит_')
self.assertEqual(t[2].name, 'factory')
gen = t.to_csv()
self.assertIn('\ufeffnvs,', gen)
self.assertIn('\nphy_инит_,', gen)
self.assertIn('\nfactory,', gen)
class BinaryParserTests(Py23TestCase):
def test_parse_one_entry(self):
# type 0x30, subtype 0xee,

View File

@@ -0,0 +1,3 @@
nvs, data, nvs, 0x9000, 24K,
phy_инит_, data, phy, 0xf000, 0x1000,
factory, app, factory, 0x10000, 1M,
1 nvs data nvs 0x9000 24K
2 phy_инит_ data phy 0xf000 0x1000
3 factory app factory 0x10000 1M

View File

@@ -0,0 +1,3 @@
nvs, data, nvs, 0x9000, 24K,
phy_инит_, data, phy, 0xf000, 0x1000,
factory, app, factory, 0x10000, 1M,
1 nvs data nvs 0x9000 24K
2 phy_инит_ data phy 0xf000 0x1000
3 factory app factory 0x10000 1M