diff --git a/components/esp_rom/CMakeLists.txt b/components/esp_rom/CMakeLists.txt index d02e827c9b..24fad05064 100644 --- a/components/esp_rom/CMakeLists.txt +++ b/components/esp_rom/CMakeLists.txt @@ -154,6 +154,10 @@ if(BOOTLOADER_BUILD) rom_linker_script("libc-funcs") else() rom_linker_script("libc") + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + rom_linker_script("libc-suboptimal_for_misaligned_mem") + endif() if(CONFIG_LIBC_NEWLIB) rom_linker_script("newlib") endif() @@ -305,6 +309,9 @@ else() # Regular app build if(CONFIG_ESP_ROM_HAS_NEWLIB AND NOT target STREQUAL "esp32" AND NOT target STREQUAL "esp32s2") # ESP32 and S2 are a bit different, keep them as special cases in the target specific include section rom_linker_script("libc") + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + rom_linker_script("libc-suboptimal_for_misaligned_mem") + endif() if(CONFIG_LIBC_NEWLIB) rom_linker_script("newlib") endif() diff --git a/components/esp_rom/esp32c2/Kconfig.soc_caps.in b/components/esp_rom/esp32c2/Kconfig.soc_caps.in index ff2127ca41..155b271d04 100644 --- a/components/esp_rom/esp32c2/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32c2/Kconfig.soc_caps.in @@ -102,3 +102,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC config ESP_ROM_CONSOLE_OUTPUT_SECONDARY bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32c2/esp_rom_caps.h b/components/esp_rom/esp32c2/esp_rom_caps.h index b0753579ba..765a7e7049 100644 --- a/components/esp_rom/esp32c2/esp_rom_caps.h +++ b/components/esp_rom/esp32c2/esp_rom_caps.h @@ -31,3 +31,4 @@ #define ESP_ROM_HAS_VPRINTF_FUNC (1) // ROM has ets_vprintf #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) #define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32c2/ld/esp32c2.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32c2/ld/esp32c2.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..481d7e82d4 --- /dev/null +++ b/components/esp_rom/esp32c2/ld/esp32c2.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x4000048c; +memmove = 0x40000490; +memcmp = 0x40000494; +strcpy = 0x40000498; +strncpy = 0x4000049c; +strcmp = 0x400004a0; +strncmp = 0x400004a4; diff --git a/components/esp_rom/esp32c2/ld/esp32c2.rom.libc.ld b/components/esp_rom/esp32c2/ld/esp32c2.rom.libc.ld index 4ea20f83b0..920da09982 100644 --- a/components/esp_rom/esp32c2/ld/esp32c2.rom.libc.ld +++ b/components/esp_rom/esp32c2/ld/esp32c2.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x40000484; memset = 0x40000488; -memcpy = 0x4000048c; -memmove = 0x40000490; -memcmp = 0x40000494; -strcpy = 0x40000498; -strncpy = 0x4000049c; -strcmp = 0x400004a0; -strncmp = 0x400004a4; strlen = 0x400004a8; strstr = 0x400004ac; bzero = 0x400004b0; diff --git a/components/esp_rom/esp32c3/Kconfig.soc_caps.in b/components/esp_rom/esp32c3/Kconfig.soc_caps.in index 9bb1b5dbe0..ef95398bc5 100644 --- a/components/esp_rom/esp32c3/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32c3/Kconfig.soc_caps.in @@ -98,3 +98,7 @@ config ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB config ESP_ROM_CONSOLE_OUTPUT_SECONDARY bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32c3/esp_rom_caps.h b/components/esp_rom/esp32c3/esp_rom_caps.h index 68247f19c0..db5ee8451f 100644 --- a/components/esp_rom/esp32c3/esp_rom_caps.h +++ b/components/esp_rom/esp32c3/esp_rom_caps.h @@ -30,3 +30,4 @@ #define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information #define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep. #define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32c3/ld/esp32c3.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32c3/ld/esp32c3.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..61aa184d24 --- /dev/null +++ b/components/esp_rom/esp32c3/ld/esp32c3.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x40000358; +memmove = 0x4000035c; +memcmp = 0x40000360; +strcpy = 0x40000364; +strncpy = 0x40000368; +strcmp = 0x4000036c; +strncmp = 0x40000370; diff --git a/components/esp_rom/esp32c3/ld/esp32c3.rom.libc.ld b/components/esp_rom/esp32c3/ld/esp32c3.rom.libc.ld index 8f14615d94..a7476cac1d 100644 --- a/components/esp_rom/esp32c3/ld/esp32c3.rom.libc.ld +++ b/components/esp_rom/esp32c3/ld/esp32c3.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x40000350; memset = 0x40000354; -memcpy = 0x40000358; -memmove = 0x4000035c; -memcmp = 0x40000360; -strcpy = 0x40000364; -strncpy = 0x40000368; -strcmp = 0x4000036c; -strncmp = 0x40000370; strlen = 0x40000374; strstr = 0x40000378; bzero = 0x4000037c; diff --git a/components/esp_rom/esp32c5/Kconfig.soc_caps.in b/components/esp_rom/esp32c5/Kconfig.soc_caps.in index ab1f135282..bfdb1fc951 100644 --- a/components/esp_rom/esp32c5/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32c5/Kconfig.soc_caps.in @@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC config ESP_ROM_CLIC_INT_THRESH_PATCH bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32c5/esp_rom_caps.h b/components/esp_rom/esp32c5/esp_rom_caps.h index f9fb986b11..d704bc8f75 100644 --- a/components/esp_rom/esp32c5/esp_rom_caps.h +++ b/components/esp_rom/esp32c5/esp_rom_caps.h @@ -32,3 +32,4 @@ #define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage. #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) #define ESP_ROM_CLIC_INT_THRESH_PATCH (1) // ROM version of esprv_intc_int_set_threshold incorrectly assumes lowest MINTTHRESH is 0x1F, should be 0xF +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32c5/ld/esp32c5.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32c5/ld/esp32c5.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..badf3224cc --- /dev/null +++ b/components/esp_rom/esp32c5/ld/esp32c5.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x400004bc; +memmove = 0x400004c0; +memcmp = 0x400004c4; +strcpy = 0x400004c8; +strncpy = 0x400004cc; +strcmp = 0x400004d0; +strncmp = 0x400004d4; diff --git a/components/esp_rom/esp32c5/ld/esp32c5.rom.libc.ld b/components/esp_rom/esp32c5/ld/esp32c5.rom.libc.ld index a7764d3bd3..59498deb4e 100644 --- a/components/esp_rom/esp32c5/ld/esp32c5.rom.libc.ld +++ b/components/esp_rom/esp32c5/ld/esp32c5.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x400004b4; memset = 0x400004b8; -memcpy = 0x400004bc; -memmove = 0x400004c0; -memcmp = 0x400004c4; -strcpy = 0x400004c8; -strncpy = 0x400004cc; -strcmp = 0x400004d0; -strncmp = 0x400004d4; strlen = 0x400004d8; strstr = 0x400004dc; bzero = 0x400004e0; diff --git a/components/esp_rom/esp32c6/Kconfig.soc_caps.in b/components/esp_rom/esp32c6/Kconfig.soc_caps.in index b902f21855..c770cf4703 100644 --- a/components/esp_rom/esp32c6/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32c6/Kconfig.soc_caps.in @@ -114,3 +114,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC config ESP_ROM_NO_USB_SERIAL_OUTPUT_API bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32c6/esp_rom_caps.h b/components/esp_rom/esp32c6/esp_rom_caps.h index c3a7470d7c..b11c6db554 100644 --- a/components/esp_rom/esp32c6/esp_rom_caps.h +++ b/components/esp_rom/esp32c6/esp_rom_caps.h @@ -34,3 +34,4 @@ #define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep. #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) #define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32c6/ld/esp32c6.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32c6/ld/esp32c6.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..0cd20d199b --- /dev/null +++ b/components/esp_rom/esp32c6/ld/esp32c6.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x400004ac; +memmove = 0x400004b0; +memcmp = 0x400004b4; +strcpy = 0x400004b8; +strncpy = 0x400004bc; +strcmp = 0x400004c0; +strncmp = 0x400004c4; diff --git a/components/esp_rom/esp32c6/ld/esp32c6.rom.libc.ld b/components/esp_rom/esp32c6/ld/esp32c6.rom.libc.ld index abc58cb644..16c5b60710 100644 --- a/components/esp_rom/esp32c6/ld/esp32c6.rom.libc.ld +++ b/components/esp_rom/esp32c6/ld/esp32c6.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x400004a4; memset = 0x400004a8; -memcpy = 0x400004ac; -memmove = 0x400004b0; -memcmp = 0x400004b4; -strcpy = 0x400004b8; -strncpy = 0x400004bc; -strcmp = 0x400004c0; -strncmp = 0x400004c4; strlen = 0x400004c8; strstr = 0x400004cc; bzero = 0x400004d0; diff --git a/components/esp_rom/esp32c61/Kconfig.soc_caps.in b/components/esp_rom/esp32c61/Kconfig.soc_caps.in index 32f8056569..506b867ed3 100644 --- a/components/esp_rom/esp32c61/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32c61/Kconfig.soc_caps.in @@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC config ESP_ROM_CACHE_WB_INVLD_LOW_RANGE bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32c61/esp_rom_caps.h b/components/esp_rom/esp32c61/esp_rom_caps.h index 1d62f0f462..7d0d22d405 100644 --- a/components/esp_rom/esp32c61/esp_rom_caps.h +++ b/components/esp_rom/esp32c61/esp_rom_caps.h @@ -32,3 +32,4 @@ #define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage. #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) #define ESP_ROM_CACHE_WB_INVLD_LOW_RANGE (1) // ROM `Cache_WriteBack_Addr` and `Cache_Invalidate_Addr` can only access low vaddr parts +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32c61/ld/esp32c61.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32c61/ld/esp32c61.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..badf3224cc --- /dev/null +++ b/components/esp_rom/esp32c61/ld/esp32c61.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x400004bc; +memmove = 0x400004c0; +memcmp = 0x400004c4; +strcpy = 0x400004c8; +strncpy = 0x400004cc; +strcmp = 0x400004d0; +strncmp = 0x400004d4; diff --git a/components/esp_rom/esp32c61/ld/esp32c61.rom.libc.ld b/components/esp_rom/esp32c61/ld/esp32c61.rom.libc.ld index 8fd3bbab77..771fa2f084 100644 --- a/components/esp_rom/esp32c61/ld/esp32c61.rom.libc.ld +++ b/components/esp_rom/esp32c61/ld/esp32c61.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x400004b4; memset = 0x400004b8; -memcpy = 0x400004bc; -memmove = 0x400004c0; -memcmp = 0x400004c4; -strcpy = 0x400004c8; -strncpy = 0x400004cc; -strcmp = 0x400004d0; -strncmp = 0x400004d4; strlen = 0x400004d8; strstr = 0x400004dc; bzero = 0x400004e0; diff --git a/components/esp_rom/esp32h2/Kconfig.soc_caps.in b/components/esp_rom/esp32h2/Kconfig.soc_caps.in index 502615e0e4..9f48e30fb5 100644 --- a/components/esp_rom/esp32h2/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32h2/Kconfig.soc_caps.in @@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC config ESP_ROM_NO_USB_SERIAL_OUTPUT_API bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32h2/esp_rom_caps.h b/components/esp_rom/esp32h2/esp_rom_caps.h index 84356d1ab2..fd5a8ed299 100644 --- a/components/esp_rom/esp32h2/esp_rom_caps.h +++ b/components/esp_rom/esp32h2/esp_rom_caps.h @@ -32,3 +32,4 @@ #define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep. #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) #define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32h2/ld/esp32h2.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32h2/ld/esp32h2.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..db1df8d6ef --- /dev/null +++ b/components/esp_rom/esp32h2/ld/esp32h2.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x400004a4; +memmove = 0x400004a8; +memcmp = 0x400004ac; +strcpy = 0x400004b0; +strncpy = 0x400004b4; +strcmp = 0x400004b8; +strncmp = 0x400004bc; diff --git a/components/esp_rom/esp32h2/ld/esp32h2.rom.libc.ld b/components/esp_rom/esp32h2/ld/esp32h2.rom.libc.ld index 488fa5678d..d6680a5d7f 100644 --- a/components/esp_rom/esp32h2/ld/esp32h2.rom.libc.ld +++ b/components/esp_rom/esp32h2/ld/esp32h2.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x4000049c; memset = 0x400004a0; -memcpy = 0x400004a4; -memmove = 0x400004a8; -memcmp = 0x400004ac; -strcpy = 0x400004b0; -strncpy = 0x400004b4; -strcmp = 0x400004b8; -strncmp = 0x400004bc; strlen = 0x400004c0; strstr = 0x400004c4; bzero = 0x400004c8; diff --git a/components/esp_rom/esp32p4/Kconfig.soc_caps.in b/components/esp_rom/esp32p4/Kconfig.soc_caps.in index 7e28eedc62..04f2b6fa4d 100644 --- a/components/esp_rom/esp32p4/Kconfig.soc_caps.in +++ b/components/esp_rom/esp32p4/Kconfig.soc_caps.in @@ -82,3 +82,7 @@ config ESP_ROM_CLIC_INT_TYPE_PATCH config ESP_ROM_HAS_OUTPUT_PUTC_FUNC bool default y + +config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + bool + default y diff --git a/components/esp_rom/esp32p4/esp_rom_caps.h b/components/esp_rom/esp32p4/esp_rom_caps.h index 3ef8fb4a30..902628236f 100644 --- a/components/esp_rom/esp32p4/esp_rom_caps.h +++ b/components/esp_rom/esp32p4/esp_rom_caps.h @@ -26,3 +26,4 @@ #define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information #define ESP_ROM_CLIC_INT_TYPE_PATCH (1) // ROM api esprv_intc_int_set_type configuring edge type interrupt is invalid #define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart) +#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access. diff --git a/components/esp_rom/esp32p4/ld/esp32p4.rom.libc-suboptimal_for_misaligned_mem.ld b/components/esp_rom/esp32p4/ld/esp32p4.rom.libc-suboptimal_for_misaligned_mem.ld new file mode 100644 index 0000000000..9742e62f16 --- /dev/null +++ b/components/esp_rom/esp32p4/ld/esp32p4.rom.libc-suboptimal_for_misaligned_mem.ld @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* These functions are not well optimized for misaligned memory access. + * See details in the https://sourceware.org/pipermail/newlib/2025/021470.html + * patch series. */ +memcpy = 0x4fc0026c; +memmove = 0x4fc00270; +memcmp = 0x4fc00274; +strcpy = 0x4fc00278; +strncpy = 0x4fc0027c; +strcmp = 0x4fc00280; +strncmp = 0x4fc00284; diff --git a/components/esp_rom/esp32p4/ld/esp32p4.rom.libc.ld b/components/esp_rom/esp32p4/ld/esp32p4.rom.libc.ld index 8f0911ef0f..65d90e205f 100644 --- a/components/esp_rom/esp32p4/ld/esp32p4.rom.libc.ld +++ b/components/esp_rom/esp32p4/ld/esp32p4.rom.libc.ld @@ -1,17 +1,10 @@ /* - * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ esp_rom_newlib_init_common_mutexes = 0x4fc00264; memset = 0x4fc00268; -memcpy = 0x4fc0026c; -memmove = 0x4fc00270; -memcmp = 0x4fc00274; -strcpy = 0x4fc00278; -strncpy = 0x4fc0027c; -strcmp = 0x4fc00280; -strncmp = 0x4fc00284; strlen = 0x4fc00288; strstr = 0x4fc0028c; bzero = 0x4fc00290; diff --git a/components/esp_rom/test_apps/rom_tests/main/CMakeLists.txt b/components/esp_rom/test_apps/rom_tests/main/CMakeLists.txt index 148bc64d7a..f7ca0448b7 100644 --- a/components/esp_rom/test_apps/rom_tests/main/CMakeLists.txt +++ b/components/esp_rom/test_apps/rom_tests/main/CMakeLists.txt @@ -1,3 +1,12 @@ +if(CONFIG_IDF_TARGET_ARCH_RISCV AND NOT DEFINED CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) + message(WARNING + "You probably added a new chip support. Please do the next steps:\n" + " 1) Check if ROM functions implementation is optimized on misaligned memory operations.\n" + " 2) Define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY to esp_rom_caps.h. If it is zero:\n" + " 2.1) Move some functions out from *.rom.libc.ld file (see *.rom.libc-suboptimal_for_misaligned_mem.ld).\n" + "Find a related test in the newlib component to use as a reference.") +endif() + set(srcs "test_app_main.c" "test_libgcc.c" "test_printf.c") diff --git a/components/newlib/CMakeLists.txt b/components/newlib/CMakeLists.txt index 0b1db2dfb1..0d23f0f103 100644 --- a/components/newlib/CMakeLists.txt +++ b/components/newlib/CMakeLists.txt @@ -37,6 +37,23 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) list(APPEND srcs "src/port/xtensa/stdatomic_s32c1i.c") endif() +if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + list(APPEND srcs + "src/string/memcmp.c" + "src/string/memmove.c" + "src/string/strncmp.c" + "src/string/strncpy.c" + "src/port/riscv/memcpy.c" + "src/port/riscv/strcpy.c" + "src/port/riscv/strcmp.S") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl") +endif() + if(CONFIG_LIBC_NEWLIB) list(APPEND srcs "src/flockfile.c" diff --git a/components/newlib/Kconfig b/components/newlib/Kconfig index 9cbfd8ec4b..9a30784c21 100644 --- a/components/newlib/Kconfig +++ b/components/newlib/Kconfig @@ -143,6 +143,23 @@ menu "LibC" select ESP_TIME_FUNCS_USE_NONE endchoice + config LIBC_OPTIMIZED_MISALIGNED_ACCESS + bool "Use performance-optimized memXXX/strXXX functions on misaligned memory access" + default n + depends on ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY + help + Enables performance-optimized implementations of memory and string functions + when handling misaligned memory. + + This increases the image size by ~1000 bytes. + + Optimized functions include: + - memcpy + - memset + - memmove + - str[n]cpy + - str[n]cmp + endmenu # LibC config STDATOMIC_S32C1I_SPIRAM_WORKAROUND diff --git a/components/newlib/src/port/riscv/memcpy.c b/components/newlib/src/port/riscv/memcpy.c new file mode 100644 index 0000000000..bbee1d3f02 --- /dev/null +++ b/components/newlib/src/port/riscv/memcpy.c @@ -0,0 +1,104 @@ +/* + * SPDX-FileCopyrightText: 2017 SiFive Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* Copyright (c) 2017 SiFive Inc. All rights reserved. + + This copyrighted material is made available to anyone wishing to use, + modify, copy, or redistribute it subject to the terms and conditions + of the FreeBSD License. This program is distributed in the hope that + it will be useful, but WITHOUT ANY WARRANTY expressed or implied, + including the implied warranties of MERCHANTABILITY or FITNESS FOR + A PARTICULAR PURPOSE. A copy of this license is available at + http://www.opensource.org/licenses. +*/ + +#include +#include +#include "esp_attr.h" +#include "../../string/local.h" + +#define unlikely(X) __builtin_expect (!!(X), 0) + +IRAM_ATTR +void * +__attribute__((optimize("-Os"))) +__inhibit_loop_to_libcall +memcpy(void *__restrict aa, const void *__restrict bb, size_t n) +{ +#define BODY(a, b, t) { \ + t tt = *b; \ + a++, b++; \ + *(a - 1) = tt; \ + } + + char *a = (char *)aa; + const char *b = (const char *)bb; + char *end = a + n; + uintptr_t msk = sizeof(long) - 1; +#if __riscv_misaligned_slow || __riscv_misaligned_fast + if (n < sizeof(long)) +#else + if (unlikely((((uintptr_t)a & msk) != ((uintptr_t)b & msk)) + || n < sizeof(long))) +#endif + { +small: + if (__builtin_expect(a < end, 1)) + while (a < end) { + BODY(a, b, char); + } + return aa; + } + + if (unlikely(((uintptr_t)a & msk) != 0)) + while ((uintptr_t)a & msk) { + BODY(a, b, char); + } + + long *la = (long *)a; + const long *lb = (const long *)b; + long *lend = (long *)((uintptr_t)end & ~msk); + + if (unlikely(lend - la > 8)) { + while (lend - la > 8) { + long b0 = *lb++; + long b1 = *lb++; + long b2 = *lb++; + long b3 = *lb++; + long b4 = *lb++; + long b5 = *lb++; + long b6 = *lb++; + long b7 = *lb++; + long b8 = *lb++; + *la++ = b0; + *la++ = b1; + *la++ = b2; + *la++ = b3; + *la++ = b4; + *la++ = b5; + *la++ = b6; + *la++ = b7; + *la++ = b8; + } + } + + while (la < lend) { + BODY(la, lb, long); + } + + a = (char *)la; + b = (const char *)lb; + if (unlikely(a < end)) { + goto small; + } + return aa; +} + +// Hook to force the linker to include this file +void esp_libc_include_memcpy_impl(void) +{ +} diff --git a/components/newlib/src/port/riscv/strcmp.S b/components/newlib/src/port/riscv/strcmp.S new file mode 100644 index 0000000000..c67edec6ad --- /dev/null +++ b/components/newlib/src/port/riscv/strcmp.S @@ -0,0 +1,195 @@ +/* + * SPDX-FileCopyrightText: 2017 SiFive Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* Copyright (c) 2017 SiFive Inc. All rights reserved. + + This copyrighted material is made available to anyone wishing to use, + modify, copy, or redistribute it subject to the terms and conditions + of the FreeBSD License. This program is distributed in the hope that + it will be useful, but WITHOUT ANY WARRANTY expressed or implied, + including the implied warranties of MERCHANTABILITY or FITNESS FOR + A PARTICULAR PURPOSE. A copy of this license is available at + http://www.opensource.org/licenses. +*/ + +#include + +.text +.globl strcmp +.type strcmp, @function +strcmp: + li t2, -1 + +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast) + or a4, a0, a1 + and a4, a4, SZREG-1 + bnez a4, .Lmisaligned +#endif + +#if SZREG == 4 + li a5, 0x7f7f7f7f +#else + ld a5, mask +#endif + + .macro check_one_word i n + REG_L a2, \i*SZREG(a0) + REG_L a3, \i*SZREG(a1) + + and t0, a2, a5 + or t1, a2, a5 + add t0, t0, a5 + or t0, t0, t1 + + bne t0, t2, .Lnull\i + .if \i+1-\n + bne a2, a3, .Lmismatch + .else + add a0, a0, \n*SZREG + add a1, a1, \n*SZREG + beq a2, a3, .Lloop + # fall through to .Lmismatch + .endif + .endm + + .macro foundnull i n + .ifne \i + .Lnull\i: + add a0, a0, \i*SZREG + add a1, a1, \i*SZREG + .ifeq \i-1 + .Lnull0: + .endif + bne a2, a3, .Lmisaligned + li a0, 0 + ret + .endif + .endm + +.Lloop: + # examine full words at a time, favoring strings of a couple dozen chars +#if __riscv_xlen == 32 + check_one_word 0 5 + check_one_word 1 5 + check_one_word 2 5 + check_one_word 3 5 + check_one_word 4 5 +#else + check_one_word 0 3 + check_one_word 1 3 + check_one_word 2 3 +#endif + # backwards branch to .Lloop contained above + +.Lmismatch: + # words don't match, but a2 has no null byte. + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#if __riscv_xlen == 64 + sll a4, a2, 48 + sll a5, a3, 48 + bne a4, a5, .Lmismatch_upper + sll a4, a2, 32 + sll a5, a3, 32 + bne a4, a5, .Lmismatch_upper +#endif + sll a4, a2, 16 + sll a5, a3, 16 + bne a4, a5, .Lmismatch_upper + + srl a4, a2, 8*SZREG-16 + srl a5, a3, 8*SZREG-16 + sub a0, a4, a5 + and a1, a0, 0xff + bnez a1, 1f + ret + +.Lmismatch_upper: + srl a4, a4, 8*SZREG-16 + srl a5, a5, 8*SZREG-16 + sub a0, a4, a5 + and a1, a0, 0xff + bnez a1, 1f + ret + +1:and a4, a4, 0xff + and a5, a5, 0xff + sub a0, a4, a5 + ret + +#else + +#if __riscv_xlen == 64 + srl a4, a2, 48 + srl a5, a3, 48 + bne a4, a5, .Lmismatch_lower + srl a4, a2, 32 + srl a5, a3, 32 + bne a4, a5, .Lmismatch_lower +#endif + srl a4, a2, 16 + srl a5, a3, 16 + bne a4, a5, .Lmismatch_lower + + srl a4, a2, 8 + srl a5, a3, 8 + bne a4, a5, 1f + and a4, a2, 0xff + and a5, a3, 0xff +1:sub a0, a4, a5 + ret + +.Lmismatch_lower: + srl a2, a4, 8 + srl a3, a5, 8 + bne a2, a3, 1f + and a2, a4, 0xff + and a3, a5, 0xff +1:sub a0, a2, a3 + ret + +#endif + +.Lmisaligned: + # misaligned + lbu a2, 0(a0) + lbu a3, 0(a1) + add a0, a0, 1 + add a1, a1, 1 + bne a2, a3, 1f + bnez a2, .Lmisaligned + +1: + sub a0, a2, a3 + ret + + # cases in which a null byte was detected +#if __riscv_xlen == 32 + foundnull 0 5 + foundnull 1 5 + foundnull 2 5 + foundnull 3 5 + foundnull 4 5 +#else + foundnull 0 3 + foundnull 1 3 + foundnull 2 3 +#endif +.size strcmp, .-strcmp + +#if SZREG == 8 +.section .srodata.cst8,"aM",@progbits,8 +.align 3 +mask: +.dword 0x7f7f7f7f7f7f7f7f +#endif + +.text +.globl esp_libc_include_strcmp_impl +.type esp_libc_include_strcmp_impl, @function +esp_libc_include_strcmp_impl: diff --git a/components/newlib/src/port/riscv/strcpy.c b/components/newlib/src/port/riscv/strcpy.c new file mode 100644 index 0000000000..361a04baa1 --- /dev/null +++ b/components/newlib/src/port/riscv/strcpy.c @@ -0,0 +1,104 @@ +/* + * SPDX-FileCopyrightText: 2017 SiFive Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* Copyright (c) 2017 SiFive Inc. All rights reserved. + + This copyrighted material is made available to anyone wishing to use, + modify, copy, or redistribute it subject to the terms and conditions + of the FreeBSD License. This program is distributed in the hope that + it will be useful, but WITHOUT ANY WARRANTY expressed or implied, + including the implied warranties of MERCHANTABILITY or FITNESS FOR + A PARTICULAR PURPOSE. A copy of this license is available at + http://www.opensource.org/licenses. +*/ + +#include +#include + +__attribute__((always_inline)) +static inline +unsigned long __newlib__libc_detect_null(unsigned long w) +{ + unsigned long mask = 0x7f7f7f7f; + if (sizeof(long) == 8) { + mask = ((mask << 16) << 16) | mask; + } + return ~(((w & mask) + mask) | w | mask); +} + +__attribute__((optimize("-Os"))) +char *strcpy(char *dst, const char *src) +{ + char *dst0 = dst; + +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast) + int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof(long) - 1); + if (__builtin_expect(!misaligned, 1)) +#endif + { + long *ldst = (long *)dst; + const long *lsrc = (const long *)src; + + while (!__newlib__libc_detect_null(*lsrc)) { + *ldst++ = *lsrc++; + } + + dst = (char *)ldst; + src = (const char *)lsrc; + + char c0 = src[0]; + char c1 = src[1]; + char c2 = src[2]; + if (!(*dst++ = c0)) { + return dst0; + } + if (!(*dst++ = c1)) { + return dst0; + } + char c3 = src[3]; + if (!(*dst++ = c2)) { + return dst0; + } + if (sizeof(long) == 4) { + goto out; + } + char c4 = src[4]; + if (!(*dst++ = c3)) { + return dst0; + } + char c5 = src[5]; + if (!(*dst++ = c4)) { + return dst0; + } + char c6 = src[6]; + if (!(*dst++ = c5)) { + return dst0; + } + if (!(*dst++ = c6)) { + return dst0; + } + +out: + *dst++ = 0; + return dst0; + } + + char ch; + do { + ch = *src; + src++; + dst++; + *(dst - 1) = ch; + } while (ch); + + return dst0; +} + +// Hook to force the linker to include this file +void esp_libc_include_strcpy_impl(void) +{ +} diff --git a/components/newlib/src/string/local.h b/components/newlib/src/string/local.h new file mode 100644 index 0000000000..bfc1260efc --- /dev/null +++ b/components/newlib/src/string/local.h @@ -0,0 +1,62 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include <_ansi.h> +#include +/* + Taken from glibc: + Add the compiler optimization to inhibit loop transformation to library + calls. This is used to avoid recursive calls in memset and memmove + default implementations. +*/ +# define __inhibit_loop_to_libcall \ + __attribute__ ((__optimize__ ("-fno-tree-loop-distribute-patterns"))) + +/* Nonzero if X is not aligned on a "long" boundary. + * This macro is used to skip a few bytes to find an aligned pointer. + * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled, + * to avoid small performance penalties (if they are not zero). */ +#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1)) + +#define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow) + +#if _HAVE_HW_MISALIGNED_ACCESS +/* Hardware performs unaligned operations with little + * to no penalty compared to byte-to-byte copy. */ +#define UNALIGNED_X_Y(X, Y) (0) +#else /* _HAVE_HW_MISALIGNED_ACCESS */ +/* Nonzero if either X or Y is not aligned on a "long" boundary. */ +#define UNALIGNED_X_Y(X, Y) \ + (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1))) +#endif /* _HAVE_HW_MISALIGNED_ACCESS */ + +/* How many bytes are copied each iteration of the word copy loop. */ +#define LITTLE_BLOCK_SIZE (sizeof (long)) + +/* How many bytes are copied each iteration of the 4X unrolled loop. */ +#define BIG_BLOCK_SIZE (sizeof (long) << 2) + +/* Threshold for punting to the little block byte copier. */ +#define TOO_SMALL_LITTLE_BLOCK(LEN) ((LEN) < LITTLE_BLOCK_SIZE) + +/* Threshold for punting to the big block byte copier. */ +#define TOO_SMALL_BIG_BLOCK(LEN) ((LEN) < BIG_BLOCK_SIZE) + +/* Macros for detecting endchar. */ +#if LONG_MAX == 2147483647L +#define DETECT_NULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080) +#else +#if LONG_MAX == 9223372036854775807L +/* Nonzero if X (a long int) contains a NULL byte. */ +#define DETECT_NULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080) +#else +#error long int is not a 32bit or 64bit type. +#endif +#endif + +/* Returns nonzero if (long)X contains the byte used to fill (long)MASK. */ +#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK)) diff --git a/components/newlib/src/string/memcmp.c b/components/newlib/src/string/memcmp.c new file mode 100644 index 0000000000..0a26ca8cd4 --- /dev/null +++ b/components/newlib/src/string/memcmp.c @@ -0,0 +1,59 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include "local.h" + +__attribute__((optimize("-Os"))) +int +memcmp(const void *m1, + const void *m2, + size_t n) +{ + unsigned char *s1 = (unsigned char *) m1; + unsigned char *s2 = (unsigned char *) m2; + unsigned long *a1; + unsigned long *a2; + + /* If the size is too small, or either pointer is unaligned, + then we punt to the byte compare loop. Hopefully this will + not turn up in inner loops. */ + if (!TOO_SMALL_LITTLE_BLOCK(n) && !UNALIGNED_X_Y(s1, s2)) { + /* Otherwise, load and compare the blocks of memory one + word at a time. */ + a1 = (unsigned long*) s1; + a2 = (unsigned long*) s2; + while (!TOO_SMALL_LITTLE_BLOCK(n)) { + if (*a1 != *a2) { + break; + } + a1++; + a2++; + n -= LITTLE_BLOCK_SIZE; + } + + /* check m mod LITTLE_BLOCK_SIZE remaining characters */ + + s1 = (unsigned char*)a1; + s2 = (unsigned char*)a2; + } + + while (n--) { + if (*s1 != *s2) { + return *s1 - *s2; + } + s1++; + s2++; + } + + return 0; +} + +// Hook to force the linker to include this file +void esp_libc_include_memcmp_impl(void) +{ +} diff --git a/components/newlib/src/string/memmove.c b/components/newlib/src/string/memmove.c new file mode 100644 index 0000000000..57071ddc09 --- /dev/null +++ b/components/newlib/src/string/memmove.c @@ -0,0 +1,88 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include <_ansi.h> +#include +#include +#include "local.h" + +__attribute__((optimize("-Os"))) +void * +__inhibit_loop_to_libcall +memmove(void *dst_void, + const void *src_void, + size_t length) +{ + char *dst = dst_void; + const char *src = src_void; + long *aligned_dst; + const long *aligned_src; + + if (src < dst && dst < src + length) { + /* Destructive overlap...have to copy backwards */ + src += length; + dst += length; + + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + *--aligned_dst = *--aligned_src; + length -= LITTLE_BLOCK_SIZE; + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *--dst = *--src; + } + } else { + /* Use optimizing algorithm for a non-destructive copy to closely + match memcpy. If the size is small or either SRC or DST is unaligned, + then punt into the byte copy loop. This should be rare. */ + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy 4X long words at a time if possible. */ + while (!TOO_SMALL_BIG_BLOCK(length)) { + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + *aligned_dst++ = *aligned_src++; + length -= BIG_BLOCK_SIZE; + } + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + *aligned_dst++ = *aligned_src++; + length -= LITTLE_BLOCK_SIZE; + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *dst++ = *src++; + } + } + + return dst_void; +} + +// Hook to force the linker to include this file +void esp_libc_include_memmove_impl(void) +{ +} diff --git a/components/newlib/src/string/strncmp.c b/components/newlib/src/string/strncmp.c new file mode 100644 index 0000000000..f0efd62189 --- /dev/null +++ b/components/newlib/src/string/strncmp.c @@ -0,0 +1,63 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include +#include "local.h" + +__attribute__((optimize("-Os"))) +int +strncmp(const char *s1, + const char *s2, + size_t n) +{ + unsigned long *a1; + unsigned long *a2; + + if (n == 0) { + return 0; + } + + /* If s1 or s2 are unaligned, then compare bytes. */ + if (!UNALIGNED_X_Y(s1, s2)) { + /* If s1 and s2 are word-aligned, compare them a word at a time. */ + a1 = (unsigned long*)s1; + a2 = (unsigned long*)s2; + while (n >= sizeof(long) && *a1 == *a2) { + n -= sizeof(long); + + /* If we've run out of bytes or hit a null, return zero + since we already know *a1 == *a2. */ + if (n == 0 || DETECT_NULL(*a1)) { + return 0; + } + + a1++; + a2++; + } + + /* A difference was detected in last few bytes of s1, so search bytewise */ + s1 = (char*)a1; + s2 = (char*)a2; + } + + while (n-- > 0 && *s1 == *s2) { + /* If we've run out of bytes or hit a null, return zero + since we already know *s1 == *s2. */ + if (n == 0 || *s1 == '\0') { + return 0; + } + s1++; + s2++; + } + return (*(unsigned char *) s1) - (*(unsigned char *) s2); +} + +// Hook to force the linker to include this file +void esp_libc_include_strncmp_impl(void) +{ +} diff --git a/components/newlib/src/string/strncpy.c b/components/newlib/src/string/strncpy.c new file mode 100644 index 0000000000..5821e1997b --- /dev/null +++ b/components/newlib/src/string/strncpy.c @@ -0,0 +1,56 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include +#include "local.h" + +__attribute__((optimize("-Os"))) +char * +strncpy(char *__restrict dst0, + const char *__restrict src0, + size_t count) +{ + char *dst = dst0; + const char *src = src0; + long *aligned_dst; + const long *aligned_src; + + /* If SRC and DEST is aligned and count large enough, then copy words. */ + if (!UNALIGNED_X_Y(src, dst) && !TOO_SMALL_LITTLE_BLOCK(count)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* SRC and DEST are both "long int" aligned, try to do "long int" + sized copies. */ + while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) { + count -= sizeof(long int); + *aligned_dst++ = *aligned_src++; + } + + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (count > 0) { + --count; + if ((*dst++ = *src++) == '\0') { + break; + } + } + + while (count-- > 0) { + *dst++ = '\0'; + } + + return dst0; +} + +// Hook to force the linker to include this file +void esp_libc_include_strncpy_impl(void) +{ +} diff --git a/components/newlib/test_apps/newlib/main/CMakeLists.txt b/components/newlib/test_apps/newlib/main/CMakeLists.txt index 65505baadf..8ac3b52270 100644 --- a/components/newlib/test_apps/newlib/main/CMakeLists.txt +++ b/components/newlib/test_apps/newlib/main/CMakeLists.txt @@ -16,6 +16,15 @@ if(CONFIG_LIBC_NEWLIB) "test_file.c") endif() +if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + list(APPEND srcs "test_misaligned_mem_performance.c") +endif() + idf_component_register(SRCS "${srcs}" PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram WHOLE_ARCHIVE) + +if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + set_source_files_properties(test_misaligned_mem_performance.c + PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes") +endif() diff --git a/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c b/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c new file mode 100644 index 0000000000..ced9b74f0a --- /dev/null +++ b/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c @@ -0,0 +1,108 @@ +/* + * SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Unlicense OR CC0-1.0 + */ +#include +#include +#include "esp_heap_caps.h" +#include "hal/cpu_ll.h" +#include "unity.h" + +#define MAX_MEMTEST_SIZE 4096 + +uint32_t test_function_dest_src_size(void (*foo)(), bool pass_size) +{ + uint32_t ccount1, ccount2; + char* test_des = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + char* test_src = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + + TEST_ASSERT_NOT_NULL(test_des); + TEST_ASSERT_NOT_NULL(test_src); + + /* Prepare arrays for X-cmp functions to make the algorithm go through whole buffers. */ + memset(test_src, 'a', MAX_MEMTEST_SIZE); + test_src[MAX_MEMTEST_SIZE - 1] = 0; + memset(test_des, 'a', MAX_MEMTEST_SIZE); + test_des[MAX_MEMTEST_SIZE - 1] = 'b'; + test_des[MAX_MEMTEST_SIZE - 1] = 0; + + ccount1 = esp_cpu_get_cycle_count(); + if (pass_size) { + foo(test_des + 1, test_src + 2, MAX_MEMTEST_SIZE - 2); + } else { + foo(test_des + 1, test_src + 2); + } + ccount2 = esp_cpu_get_cycle_count(); + + heap_caps_free(test_des); + heap_caps_free(test_src); + + return ccount2 - ccount1; +} + +TEST_CASE("memcpy", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(memcpy, true); + /* esp32c2: 4128 cycles instead 28676. */ + TEST_ASSERT_LESS_THAN(5000, ccount); +} + +TEST_CASE("memcmp", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(memcmp, true); + /* esp32c2: 14259 cycles instead 49147. */ + TEST_ASSERT_LESS_THAN(16000, ccount); +} + +TEST_CASE("memmove", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(memmove, true); + /* esp32c2: 8086 cycles instead 33896. */ + TEST_ASSERT_LESS_THAN(10000, ccount); +} + +TEST_CASE("memmove - overlapping", "[misaligned_mem]") +{ + uint32_t ccount1, ccount2; + char* buf = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + + TEST_ASSERT_NOT_NULL(buf); + + ccount1 = esp_cpu_get_cycle_count(); + memmove(buf + 5, buf + 2, MAX_MEMTEST_SIZE - 5); + ccount2 = esp_cpu_get_cycle_count(); + + heap_caps_free(buf); + + /* esp32c2: 11503 cycles instead 45024. */ + TEST_ASSERT_LESS_THAN(15000, ccount2 - ccount1); +} + +TEST_CASE("strcpy", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(strcpy, false); + /* esp32c2: 17313 cycles instead 32771. */ + TEST_ASSERT_LESS_THAN(20000, ccount); +} + +TEST_CASE("strcmp", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(strcmp, false); + /* esp32c2: 13191 cycles instead 32775. */ + TEST_ASSERT_LESS_THAN(15000, ccount); +} + +TEST_CASE("strncpy", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(strncpy, true); + /* esp32c2: 21475 cycles instead 36859. */ + TEST_ASSERT_LESS_THAN(25000, ccount); +} + +TEST_CASE("strncmp", "[misaligned_mem]") +{ + uint32_t ccount = test_function_dest_src_size(strncmp, true); + /* esp32c2: 24369 cycles instead 49141. */ + TEST_ASSERT_LESS_THAN(27000, ccount); +} diff --git a/components/newlib/test_apps/newlib/pytest_newlib.py b/components/newlib/test_apps/newlib/pytest_newlib.py index 36c142a74d..96d5e4ac1c 100644 --- a/components/newlib/test_apps/newlib/pytest_newlib.py +++ b/components/newlib/test_apps/newlib/pytest_newlib.py @@ -16,6 +16,7 @@ from pytest_embedded_idf.utils import idf_parametrize ('psram_esp32', 'esp32'), ('release_esp32', 'esp32'), ('release_esp32c2', 'esp32c2'), + ('misaligned_mem', 'esp32c3'), ], indirect=['config', 'target'], ) diff --git a/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem new file mode 100644 index 0000000000..17a61e401a --- /dev/null +++ b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem @@ -0,0 +1 @@ +CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y diff --git a/docs/en/api-guides/performance/speed.rst b/docs/en/api-guides/performance/speed.rst index 4180dcb7dd..17b9b658af 100644 --- a/docs/en/api-guides/performance/speed.rst +++ b/docs/en/api-guides/performance/speed.rst @@ -87,6 +87,7 @@ The following optimizations improve the execution of nearly all code, including :SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. Even though {IDF_TARGET_NAME} has a single precision hardware floating point unit, floating point calculations are always slower than integer calculations. If possible then use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point. :not SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. On {IDF_TARGET_NAME} these calculations are emulated in software and are very slow. If possible, use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point. - Avoid using double precision floating point arithmetic ``double``. These calculations are emulated in software and are very slow. If possible then use an integer-based representation, or single-precision floating point. + :CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY: - Avoid misaligned 4-byte memory accesses in performance-critical code sections. For potential performance improvements, consider enabling :ref:`CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS`. Note that properly aligned memory operations will always execute at full speed without performance penalties. Requires additional ~190 bytes of IRAM and ~870 bytes of flash memory. .. only:: esp32s2 or esp32s3 or esp32p4 diff --git a/tools/ci/check_copyright_config.yaml b/tools/ci/check_copyright_config.yaml index c7f4a3d4ef..342e05e72c 100644 --- a/tools/ci/check_copyright_config.yaml +++ b/tools/ci/check_copyright_config.yaml @@ -79,8 +79,7 @@ newlib_component: - 'components/newlib/platform_include/**' - 'components/newlib/port/**' - 'components/newlib/priv_include/**' - - 'components/newlib/*.c' - - 'components/newlib/*.h' + - 'components/newlib/src/**' allowed_licenses: - Apache-2.0 - BSD-2-Clause-FreeBSD AND Apache-2.0