forked from espressif/esp-idf
Merge branch 'feat/newlib-optimized-misaligned-memory' into 'master'
feat(newlib): riscv: add CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS config option See merge request espressif/esp-idf!36704
This commit is contained in:
@@ -154,6 +154,10 @@ if(BOOTLOADER_BUILD)
|
||||
rom_linker_script("libc-funcs")
|
||||
else()
|
||||
rom_linker_script("libc")
|
||||
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
|
||||
rom_linker_script("libc-suboptimal_for_misaligned_mem")
|
||||
endif()
|
||||
if(CONFIG_LIBC_NEWLIB)
|
||||
rom_linker_script("newlib")
|
||||
endif()
|
||||
@@ -305,6 +309,9 @@ else() # Regular app build
|
||||
if(CONFIG_ESP_ROM_HAS_NEWLIB AND NOT target STREQUAL "esp32" AND NOT target STREQUAL "esp32s2")
|
||||
# ESP32 and S2 are a bit different, keep them as special cases in the target specific include section
|
||||
rom_linker_script("libc")
|
||||
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
|
||||
rom_linker_script("libc-suboptimal_for_misaligned_mem")
|
||||
endif()
|
||||
if(CONFIG_LIBC_NEWLIB)
|
||||
rom_linker_script("newlib")
|
||||
endif()
|
||||
|
@@ -102,3 +102,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
config ESP_ROM_CONSOLE_OUTPUT_SECONDARY
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -31,3 +31,4 @@
|
||||
#define ESP_ROM_HAS_VPRINTF_FUNC (1) // ROM has ets_vprintf
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x4000048c;
|
||||
memmove = 0x40000490;
|
||||
memcmp = 0x40000494;
|
||||
strcpy = 0x40000498;
|
||||
strncpy = 0x4000049c;
|
||||
strcmp = 0x400004a0;
|
||||
strncmp = 0x400004a4;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x40000484;
|
||||
memset = 0x40000488;
|
||||
memcpy = 0x4000048c;
|
||||
memmove = 0x40000490;
|
||||
memcmp = 0x40000494;
|
||||
strcpy = 0x40000498;
|
||||
strncpy = 0x4000049c;
|
||||
strcmp = 0x400004a0;
|
||||
strncmp = 0x400004a4;
|
||||
strlen = 0x400004a8;
|
||||
strstr = 0x400004ac;
|
||||
bzero = 0x400004b0;
|
||||
|
@@ -98,3 +98,7 @@ config ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB
|
||||
config ESP_ROM_CONSOLE_OUTPUT_SECONDARY
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -30,3 +30,4 @@
|
||||
#define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information
|
||||
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
|
||||
#define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x40000358;
|
||||
memmove = 0x4000035c;
|
||||
memcmp = 0x40000360;
|
||||
strcpy = 0x40000364;
|
||||
strncpy = 0x40000368;
|
||||
strcmp = 0x4000036c;
|
||||
strncmp = 0x40000370;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x40000350;
|
||||
memset = 0x40000354;
|
||||
memcpy = 0x40000358;
|
||||
memmove = 0x4000035c;
|
||||
memcmp = 0x40000360;
|
||||
strcpy = 0x40000364;
|
||||
strncpy = 0x40000368;
|
||||
strcmp = 0x4000036c;
|
||||
strncmp = 0x40000370;
|
||||
strlen = 0x40000374;
|
||||
strstr = 0x40000378;
|
||||
bzero = 0x4000037c;
|
||||
|
@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
config ESP_ROM_CLIC_INT_THRESH_PATCH
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -32,3 +32,4 @@
|
||||
#define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage.
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_CLIC_INT_THRESH_PATCH (1) // ROM version of esprv_intc_int_set_threshold incorrectly assumes lowest MINTTHRESH is 0x1F, should be 0xF
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x400004bc;
|
||||
memmove = 0x400004c0;
|
||||
memcmp = 0x400004c4;
|
||||
strcpy = 0x400004c8;
|
||||
strncpy = 0x400004cc;
|
||||
strcmp = 0x400004d0;
|
||||
strncmp = 0x400004d4;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x400004b4;
|
||||
memset = 0x400004b8;
|
||||
memcpy = 0x400004bc;
|
||||
memmove = 0x400004c0;
|
||||
memcmp = 0x400004c4;
|
||||
strcpy = 0x400004c8;
|
||||
strncpy = 0x400004cc;
|
||||
strcmp = 0x400004d0;
|
||||
strncmp = 0x400004d4;
|
||||
strlen = 0x400004d8;
|
||||
strstr = 0x400004dc;
|
||||
bzero = 0x400004e0;
|
||||
|
@@ -114,3 +114,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
config ESP_ROM_NO_USB_SERIAL_OUTPUT_API
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -34,3 +34,4 @@
|
||||
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x400004ac;
|
||||
memmove = 0x400004b0;
|
||||
memcmp = 0x400004b4;
|
||||
strcpy = 0x400004b8;
|
||||
strncpy = 0x400004bc;
|
||||
strcmp = 0x400004c0;
|
||||
strncmp = 0x400004c4;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x400004a4;
|
||||
memset = 0x400004a8;
|
||||
memcpy = 0x400004ac;
|
||||
memmove = 0x400004b0;
|
||||
memcmp = 0x400004b4;
|
||||
strcpy = 0x400004b8;
|
||||
strncpy = 0x400004bc;
|
||||
strcmp = 0x400004c0;
|
||||
strncmp = 0x400004c4;
|
||||
strlen = 0x400004c8;
|
||||
strstr = 0x400004cc;
|
||||
bzero = 0x400004d0;
|
||||
|
@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
config ESP_ROM_CACHE_WB_INVLD_LOW_RANGE
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -32,3 +32,4 @@
|
||||
#define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage.
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_CACHE_WB_INVLD_LOW_RANGE (1) // ROM `Cache_WriteBack_Addr` and `Cache_Invalidate_Addr` can only access low vaddr parts
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x400004bc;
|
||||
memmove = 0x400004c0;
|
||||
memcmp = 0x400004c4;
|
||||
strcpy = 0x400004c8;
|
||||
strncpy = 0x400004cc;
|
||||
strcmp = 0x400004d0;
|
||||
strncmp = 0x400004d4;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x400004b4;
|
||||
memset = 0x400004b8;
|
||||
memcpy = 0x400004bc;
|
||||
memmove = 0x400004c0;
|
||||
memcmp = 0x400004c4;
|
||||
strcpy = 0x400004c8;
|
||||
strncpy = 0x400004cc;
|
||||
strcmp = 0x400004d0;
|
||||
strncmp = 0x400004d4;
|
||||
strlen = 0x400004d8;
|
||||
strstr = 0x400004dc;
|
||||
bzero = 0x400004e0;
|
||||
|
@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
config ESP_ROM_NO_USB_SERIAL_OUTPUT_API
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -32,3 +32,4 @@
|
||||
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x400004a4;
|
||||
memmove = 0x400004a8;
|
||||
memcmp = 0x400004ac;
|
||||
strcpy = 0x400004b0;
|
||||
strncpy = 0x400004b4;
|
||||
strcmp = 0x400004b8;
|
||||
strncmp = 0x400004bc;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x4000049c;
|
||||
memset = 0x400004a0;
|
||||
memcpy = 0x400004a4;
|
||||
memmove = 0x400004a8;
|
||||
memcmp = 0x400004ac;
|
||||
strcpy = 0x400004b0;
|
||||
strncpy = 0x400004b4;
|
||||
strcmp = 0x400004b8;
|
||||
strncmp = 0x400004bc;
|
||||
strlen = 0x400004c0;
|
||||
strstr = 0x400004c4;
|
||||
bzero = 0x400004c8;
|
||||
|
@@ -82,3 +82,7 @@ config ESP_ROM_CLIC_INT_TYPE_PATCH
|
||||
config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
|
||||
bool
|
||||
default y
|
||||
|
||||
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
bool
|
||||
default y
|
||||
|
@@ -26,3 +26,4 @@
|
||||
#define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information
|
||||
#define ESP_ROM_CLIC_INT_TYPE_PATCH (1) // ROM api esprv_intc_int_set_type configuring edge type interrupt is invalid
|
||||
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
|
||||
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.
|
||||
|
@@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
/* These functions are not well optimized for misaligned memory access.
|
||||
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
|
||||
* patch series. */
|
||||
memcpy = 0x4fc0026c;
|
||||
memmove = 0x4fc00270;
|
||||
memcmp = 0x4fc00274;
|
||||
strcpy = 0x4fc00278;
|
||||
strncpy = 0x4fc0027c;
|
||||
strcmp = 0x4fc00280;
|
||||
strncmp = 0x4fc00284;
|
@@ -1,17 +1,10 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
|
||||
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
esp_rom_newlib_init_common_mutexes = 0x4fc00264;
|
||||
memset = 0x4fc00268;
|
||||
memcpy = 0x4fc0026c;
|
||||
memmove = 0x4fc00270;
|
||||
memcmp = 0x4fc00274;
|
||||
strcpy = 0x4fc00278;
|
||||
strncpy = 0x4fc0027c;
|
||||
strcmp = 0x4fc00280;
|
||||
strncmp = 0x4fc00284;
|
||||
strlen = 0x4fc00288;
|
||||
strstr = 0x4fc0028c;
|
||||
bzero = 0x4fc00290;
|
||||
|
@@ -1,3 +1,12 @@
|
||||
if(CONFIG_IDF_TARGET_ARCH_RISCV AND NOT DEFINED CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY)
|
||||
message(WARNING
|
||||
"You probably added a new chip support. Please do the next steps:\n"
|
||||
" 1) Check if ROM functions implementation is optimized on misaligned memory operations.\n"
|
||||
" 2) Define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY to esp_rom_caps.h. If it is zero:\n"
|
||||
" 2.1) Move some functions out from *.rom.libc.ld file (see *.rom.libc-suboptimal_for_misaligned_mem.ld).\n"
|
||||
"Find a related test in the newlib component to use as a reference.")
|
||||
endif()
|
||||
|
||||
set(srcs "test_app_main.c"
|
||||
"test_libgcc.c"
|
||||
"test_printf.c")
|
||||
|
@@ -37,6 +37,23 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND)
|
||||
list(APPEND srcs "src/port/xtensa/stdatomic_s32c1i.c")
|
||||
endif()
|
||||
|
||||
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
|
||||
list(APPEND srcs
|
||||
"src/string/memcmp.c"
|
||||
"src/string/memmove.c"
|
||||
"src/string/strncmp.c"
|
||||
"src/string/strncpy.c"
|
||||
"src/port/riscv/memcpy.c"
|
||||
"src/port/riscv/strcpy.c"
|
||||
"src/port/riscv/strcmp.S")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl")
|
||||
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl")
|
||||
endif()
|
||||
|
||||
if(CONFIG_LIBC_NEWLIB)
|
||||
list(APPEND srcs
|
||||
"src/flockfile.c"
|
||||
|
@@ -143,6 +143,23 @@ menu "LibC"
|
||||
select ESP_TIME_FUNCS_USE_NONE
|
||||
endchoice
|
||||
|
||||
config LIBC_OPTIMIZED_MISALIGNED_ACCESS
|
||||
bool "Use performance-optimized memXXX/strXXX functions on misaligned memory access"
|
||||
default n
|
||||
depends on ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
|
||||
help
|
||||
Enables performance-optimized implementations of memory and string functions
|
||||
when handling misaligned memory.
|
||||
|
||||
This increases the image size by ~1000 bytes.
|
||||
|
||||
Optimized functions include:
|
||||
- memcpy
|
||||
- memset
|
||||
- memmove
|
||||
- str[n]cpy
|
||||
- str[n]cmp
|
||||
|
||||
endmenu # LibC
|
||||
|
||||
config STDATOMIC_S32C1I_SPIRAM_WORKAROUND
|
||||
|
104
components/newlib/src/port/riscv/memcpy.c
Normal file
104
components/newlib/src/port/riscv/memcpy.c
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2017 SiFive Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
|
||||
|
||||
This copyrighted material is made available to anyone wishing to use,
|
||||
modify, copy, or redistribute it subject to the terms and conditions
|
||||
of the FreeBSD License. This program is distributed in the hope that
|
||||
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
|
||||
including the implied warranties of MERCHANTABILITY or FITNESS FOR
|
||||
A PARTICULAR PURPOSE. A copy of this license is available at
|
||||
http://www.opensource.org/licenses.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "esp_attr.h"
|
||||
#include "../../string/local.h"
|
||||
|
||||
#define unlikely(X) __builtin_expect (!!(X), 0)
|
||||
|
||||
IRAM_ATTR
|
||||
void *
|
||||
__attribute__((optimize("-Os")))
|
||||
__inhibit_loop_to_libcall
|
||||
memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
|
||||
{
|
||||
#define BODY(a, b, t) { \
|
||||
t tt = *b; \
|
||||
a++, b++; \
|
||||
*(a - 1) = tt; \
|
||||
}
|
||||
|
||||
char *a = (char *)aa;
|
||||
const char *b = (const char *)bb;
|
||||
char *end = a + n;
|
||||
uintptr_t msk = sizeof(long) - 1;
|
||||
#if __riscv_misaligned_slow || __riscv_misaligned_fast
|
||||
if (n < sizeof(long))
|
||||
#else
|
||||
if (unlikely((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
|
||||
|| n < sizeof(long)))
|
||||
#endif
|
||||
{
|
||||
small:
|
||||
if (__builtin_expect(a < end, 1))
|
||||
while (a < end) {
|
||||
BODY(a, b, char);
|
||||
}
|
||||
return aa;
|
||||
}
|
||||
|
||||
if (unlikely(((uintptr_t)a & msk) != 0))
|
||||
while ((uintptr_t)a & msk) {
|
||||
BODY(a, b, char);
|
||||
}
|
||||
|
||||
long *la = (long *)a;
|
||||
const long *lb = (const long *)b;
|
||||
long *lend = (long *)((uintptr_t)end & ~msk);
|
||||
|
||||
if (unlikely(lend - la > 8)) {
|
||||
while (lend - la > 8) {
|
||||
long b0 = *lb++;
|
||||
long b1 = *lb++;
|
||||
long b2 = *lb++;
|
||||
long b3 = *lb++;
|
||||
long b4 = *lb++;
|
||||
long b5 = *lb++;
|
||||
long b6 = *lb++;
|
||||
long b7 = *lb++;
|
||||
long b8 = *lb++;
|
||||
*la++ = b0;
|
||||
*la++ = b1;
|
||||
*la++ = b2;
|
||||
*la++ = b3;
|
||||
*la++ = b4;
|
||||
*la++ = b5;
|
||||
*la++ = b6;
|
||||
*la++ = b7;
|
||||
*la++ = b8;
|
||||
}
|
||||
}
|
||||
|
||||
while (la < lend) {
|
||||
BODY(la, lb, long);
|
||||
}
|
||||
|
||||
a = (char *)la;
|
||||
b = (const char *)lb;
|
||||
if (unlikely(a < end)) {
|
||||
goto small;
|
||||
}
|
||||
return aa;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_memcpy_impl(void)
|
||||
{
|
||||
}
|
195
components/newlib/src/port/riscv/strcmp.S
Normal file
195
components/newlib/src/port/riscv/strcmp.S
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2017 SiFive Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
|
||||
|
||||
This copyrighted material is made available to anyone wishing to use,
|
||||
modify, copy, or redistribute it subject to the terms and conditions
|
||||
of the FreeBSD License. This program is distributed in the hope that
|
||||
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
|
||||
including the implied warranties of MERCHANTABILITY or FITNESS FOR
|
||||
A PARTICULAR PURPOSE. A copy of this license is available at
|
||||
http://www.opensource.org/licenses.
|
||||
*/
|
||||
|
||||
#include <sys/asm.h>
|
||||
|
||||
.text
|
||||
.globl strcmp
|
||||
.type strcmp, @function
|
||||
strcmp:
|
||||
li t2, -1
|
||||
|
||||
#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
|
||||
or a4, a0, a1
|
||||
and a4, a4, SZREG-1
|
||||
bnez a4, .Lmisaligned
|
||||
#endif
|
||||
|
||||
#if SZREG == 4
|
||||
li a5, 0x7f7f7f7f
|
||||
#else
|
||||
ld a5, mask
|
||||
#endif
|
||||
|
||||
.macro check_one_word i n
|
||||
REG_L a2, \i*SZREG(a0)
|
||||
REG_L a3, \i*SZREG(a1)
|
||||
|
||||
and t0, a2, a5
|
||||
or t1, a2, a5
|
||||
add t0, t0, a5
|
||||
or t0, t0, t1
|
||||
|
||||
bne t0, t2, .Lnull\i
|
||||
.if \i+1-\n
|
||||
bne a2, a3, .Lmismatch
|
||||
.else
|
||||
add a0, a0, \n*SZREG
|
||||
add a1, a1, \n*SZREG
|
||||
beq a2, a3, .Lloop
|
||||
# fall through to .Lmismatch
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro foundnull i n
|
||||
.ifne \i
|
||||
.Lnull\i:
|
||||
add a0, a0, \i*SZREG
|
||||
add a1, a1, \i*SZREG
|
||||
.ifeq \i-1
|
||||
.Lnull0:
|
||||
.endif
|
||||
bne a2, a3, .Lmisaligned
|
||||
li a0, 0
|
||||
ret
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.Lloop:
|
||||
# examine full words at a time, favoring strings of a couple dozen chars
|
||||
#if __riscv_xlen == 32
|
||||
check_one_word 0 5
|
||||
check_one_word 1 5
|
||||
check_one_word 2 5
|
||||
check_one_word 3 5
|
||||
check_one_word 4 5
|
||||
#else
|
||||
check_one_word 0 3
|
||||
check_one_word 1 3
|
||||
check_one_word 2 3
|
||||
#endif
|
||||
# backwards branch to .Lloop contained above
|
||||
|
||||
.Lmismatch:
|
||||
# words don't match, but a2 has no null byte.
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
sll a4, a2, 48
|
||||
sll a5, a3, 48
|
||||
bne a4, a5, .Lmismatch_upper
|
||||
sll a4, a2, 32
|
||||
sll a5, a3, 32
|
||||
bne a4, a5, .Lmismatch_upper
|
||||
#endif
|
||||
sll a4, a2, 16
|
||||
sll a5, a3, 16
|
||||
bne a4, a5, .Lmismatch_upper
|
||||
|
||||
srl a4, a2, 8*SZREG-16
|
||||
srl a5, a3, 8*SZREG-16
|
||||
sub a0, a4, a5
|
||||
and a1, a0, 0xff
|
||||
bnez a1, 1f
|
||||
ret
|
||||
|
||||
.Lmismatch_upper:
|
||||
srl a4, a4, 8*SZREG-16
|
||||
srl a5, a5, 8*SZREG-16
|
||||
sub a0, a4, a5
|
||||
and a1, a0, 0xff
|
||||
bnez a1, 1f
|
||||
ret
|
||||
|
||||
1:and a4, a4, 0xff
|
||||
and a5, a5, 0xff
|
||||
sub a0, a4, a5
|
||||
ret
|
||||
|
||||
#else
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
srl a4, a2, 48
|
||||
srl a5, a3, 48
|
||||
bne a4, a5, .Lmismatch_lower
|
||||
srl a4, a2, 32
|
||||
srl a5, a3, 32
|
||||
bne a4, a5, .Lmismatch_lower
|
||||
#endif
|
||||
srl a4, a2, 16
|
||||
srl a5, a3, 16
|
||||
bne a4, a5, .Lmismatch_lower
|
||||
|
||||
srl a4, a2, 8
|
||||
srl a5, a3, 8
|
||||
bne a4, a5, 1f
|
||||
and a4, a2, 0xff
|
||||
and a5, a3, 0xff
|
||||
1:sub a0, a4, a5
|
||||
ret
|
||||
|
||||
.Lmismatch_lower:
|
||||
srl a2, a4, 8
|
||||
srl a3, a5, 8
|
||||
bne a2, a3, 1f
|
||||
and a2, a4, 0xff
|
||||
and a3, a5, 0xff
|
||||
1:sub a0, a2, a3
|
||||
ret
|
||||
|
||||
#endif
|
||||
|
||||
.Lmisaligned:
|
||||
# misaligned
|
||||
lbu a2, 0(a0)
|
||||
lbu a3, 0(a1)
|
||||
add a0, a0, 1
|
||||
add a1, a1, 1
|
||||
bne a2, a3, 1f
|
||||
bnez a2, .Lmisaligned
|
||||
|
||||
1:
|
||||
sub a0, a2, a3
|
||||
ret
|
||||
|
||||
# cases in which a null byte was detected
|
||||
#if __riscv_xlen == 32
|
||||
foundnull 0 5
|
||||
foundnull 1 5
|
||||
foundnull 2 5
|
||||
foundnull 3 5
|
||||
foundnull 4 5
|
||||
#else
|
||||
foundnull 0 3
|
||||
foundnull 1 3
|
||||
foundnull 2 3
|
||||
#endif
|
||||
.size strcmp, .-strcmp
|
||||
|
||||
#if SZREG == 8
|
||||
.section .srodata.cst8,"aM",@progbits,8
|
||||
.align 3
|
||||
mask:
|
||||
.dword 0x7f7f7f7f7f7f7f7f
|
||||
#endif
|
||||
|
||||
.text
|
||||
.globl esp_libc_include_strcmp_impl
|
||||
.type esp_libc_include_strcmp_impl, @function
|
||||
esp_libc_include_strcmp_impl:
|
104
components/newlib/src/port/riscv/strcpy.c
Normal file
104
components/newlib/src/port/riscv/strcpy.c
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2017 SiFive Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
|
||||
|
||||
This copyrighted material is made available to anyone wishing to use,
|
||||
modify, copy, or redistribute it subject to the terms and conditions
|
||||
of the FreeBSD License. This program is distributed in the hope that
|
||||
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
|
||||
including the implied warranties of MERCHANTABILITY or FITNESS FOR
|
||||
A PARTICULAR PURPOSE. A copy of this license is available at
|
||||
http://www.opensource.org/licenses.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
__attribute__((always_inline))
|
||||
static inline
|
||||
unsigned long __newlib__libc_detect_null(unsigned long w)
|
||||
{
|
||||
unsigned long mask = 0x7f7f7f7f;
|
||||
if (sizeof(long) == 8) {
|
||||
mask = ((mask << 16) << 16) | mask;
|
||||
}
|
||||
return ~(((w & mask) + mask) | w | mask);
|
||||
}
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
char *strcpy(char *dst, const char *src)
|
||||
{
|
||||
char *dst0 = dst;
|
||||
|
||||
#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
|
||||
int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof(long) - 1);
|
||||
if (__builtin_expect(!misaligned, 1))
|
||||
#endif
|
||||
{
|
||||
long *ldst = (long *)dst;
|
||||
const long *lsrc = (const long *)src;
|
||||
|
||||
while (!__newlib__libc_detect_null(*lsrc)) {
|
||||
*ldst++ = *lsrc++;
|
||||
}
|
||||
|
||||
dst = (char *)ldst;
|
||||
src = (const char *)lsrc;
|
||||
|
||||
char c0 = src[0];
|
||||
char c1 = src[1];
|
||||
char c2 = src[2];
|
||||
if (!(*dst++ = c0)) {
|
||||
return dst0;
|
||||
}
|
||||
if (!(*dst++ = c1)) {
|
||||
return dst0;
|
||||
}
|
||||
char c3 = src[3];
|
||||
if (!(*dst++ = c2)) {
|
||||
return dst0;
|
||||
}
|
||||
if (sizeof(long) == 4) {
|
||||
goto out;
|
||||
}
|
||||
char c4 = src[4];
|
||||
if (!(*dst++ = c3)) {
|
||||
return dst0;
|
||||
}
|
||||
char c5 = src[5];
|
||||
if (!(*dst++ = c4)) {
|
||||
return dst0;
|
||||
}
|
||||
char c6 = src[6];
|
||||
if (!(*dst++ = c5)) {
|
||||
return dst0;
|
||||
}
|
||||
if (!(*dst++ = c6)) {
|
||||
return dst0;
|
||||
}
|
||||
|
||||
out:
|
||||
*dst++ = 0;
|
||||
return dst0;
|
||||
}
|
||||
|
||||
char ch;
|
||||
do {
|
||||
ch = *src;
|
||||
src++;
|
||||
dst++;
|
||||
*(dst - 1) = ch;
|
||||
} while (ch);
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_strcpy_impl(void)
|
||||
{
|
||||
}
|
62
components/newlib/src/string/local.h
Normal file
62
components/newlib/src/string/local.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <_ansi.h>
|
||||
#include <limits.h>
|
||||
/*
|
||||
Taken from glibc:
|
||||
Add the compiler optimization to inhibit loop transformation to library
|
||||
calls. This is used to avoid recursive calls in memset and memmove
|
||||
default implementations.
|
||||
*/
|
||||
# define __inhibit_loop_to_libcall \
|
||||
__attribute__ ((__optimize__ ("-fno-tree-loop-distribute-patterns")))
|
||||
|
||||
/* Nonzero if X is not aligned on a "long" boundary.
|
||||
* This macro is used to skip a few bytes to find an aligned pointer.
|
||||
* It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
|
||||
* to avoid small performance penalties (if they are not zero). */
|
||||
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
|
||||
|
||||
#define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow)
|
||||
|
||||
#if _HAVE_HW_MISALIGNED_ACCESS
|
||||
/* Hardware performs unaligned operations with little
|
||||
* to no penalty compared to byte-to-byte copy. */
|
||||
#define UNALIGNED_X_Y(X, Y) (0)
|
||||
#else /* _HAVE_HW_MISALIGNED_ACCESS */
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED_X_Y(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
#endif /* _HAVE_HW_MISALIGNED_ACCESS */
|
||||
|
||||
/* How many bytes are copied each iteration of the word copy loop. */
|
||||
#define LITTLE_BLOCK_SIZE (sizeof (long))
|
||||
|
||||
/* How many bytes are copied each iteration of the 4X unrolled loop. */
|
||||
#define BIG_BLOCK_SIZE (sizeof (long) << 2)
|
||||
|
||||
/* Threshold for punting to the little block byte copier. */
|
||||
#define TOO_SMALL_LITTLE_BLOCK(LEN) ((LEN) < LITTLE_BLOCK_SIZE)
|
||||
|
||||
/* Threshold for punting to the big block byte copier. */
|
||||
#define TOO_SMALL_BIG_BLOCK(LEN) ((LEN) < BIG_BLOCK_SIZE)
|
||||
|
||||
/* Macros for detecting endchar. */
|
||||
#if LONG_MAX == 2147483647L
|
||||
#define DETECT_NULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
|
||||
#else
|
||||
#if LONG_MAX == 9223372036854775807L
|
||||
/* Nonzero if X (a long int) contains a NULL byte. */
|
||||
#define DETECT_NULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
|
||||
#else
|
||||
#error long int is not a 32bit or 64bit type.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Returns nonzero if (long)X contains the byte used to fill (long)MASK. */
|
||||
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))
|
59
components/newlib/src/string/memcmp.c
Normal file
59
components/newlib/src/string/memcmp.c
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
int
|
||||
memcmp(const void *m1,
|
||||
const void *m2,
|
||||
size_t n)
|
||||
{
|
||||
unsigned char *s1 = (unsigned char *) m1;
|
||||
unsigned char *s2 = (unsigned char *) m2;
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
/* If the size is too small, or either pointer is unaligned,
|
||||
then we punt to the byte compare loop. Hopefully this will
|
||||
not turn up in inner loops. */
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(n) && !UNALIGNED_X_Y(s1, s2)) {
|
||||
/* Otherwise, load and compare the blocks of memory one
|
||||
word at a time. */
|
||||
a1 = (unsigned long*) s1;
|
||||
a2 = (unsigned long*) s2;
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(n)) {
|
||||
if (*a1 != *a2) {
|
||||
break;
|
||||
}
|
||||
a1++;
|
||||
a2++;
|
||||
n -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* check m mod LITTLE_BLOCK_SIZE remaining characters */
|
||||
|
||||
s1 = (unsigned char*)a1;
|
||||
s2 = (unsigned char*)a2;
|
||||
}
|
||||
|
||||
while (n--) {
|
||||
if (*s1 != *s2) {
|
||||
return *s1 - *s2;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_memcmp_impl(void)
|
||||
{
|
||||
}
|
88
components/newlib/src/string/memmove.c
Normal file
88
components/newlib/src/string/memmove.c
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <_ansi.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
void *
|
||||
__inhibit_loop_to_libcall
|
||||
memmove(void *dst_void,
|
||||
const void *src_void,
|
||||
size_t length)
|
||||
{
|
||||
char *dst = dst_void;
|
||||
const char *src = src_void;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
if (src < dst && dst < src + length) {
|
||||
/* Destructive overlap...have to copy backwards */
|
||||
src += length;
|
||||
dst += length;
|
||||
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
|
||||
*--aligned_dst = *--aligned_src;
|
||||
length -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (length--) {
|
||||
*--dst = *--src;
|
||||
}
|
||||
} else {
|
||||
/* Use optimizing algorithm for a non-destructive copy to closely
|
||||
match memcpy. If the size is small or either SRC or DST is unaligned,
|
||||
then punt into the byte copy loop. This should be rare. */
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy 4X long words at a time if possible. */
|
||||
while (!TOO_SMALL_BIG_BLOCK(length)) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= BIG_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (length--) {
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return dst_void;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_memmove_impl(void)
|
||||
{
|
||||
}
|
63
components/newlib/src/string/strncmp.c
Normal file
63
components/newlib/src/string/strncmp.c
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
int
|
||||
strncmp(const char *s1,
|
||||
const char *s2,
|
||||
size_t n)
|
||||
{
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
if (n == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If s1 or s2 are unaligned, then compare bytes. */
|
||||
if (!UNALIGNED_X_Y(s1, s2)) {
|
||||
/* If s1 and s2 are word-aligned, compare them a word at a time. */
|
||||
a1 = (unsigned long*)s1;
|
||||
a2 = (unsigned long*)s2;
|
||||
while (n >= sizeof(long) && *a1 == *a2) {
|
||||
n -= sizeof(long);
|
||||
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *a1 == *a2. */
|
||||
if (n == 0 || DETECT_NULL(*a1)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
a1++;
|
||||
a2++;
|
||||
}
|
||||
|
||||
/* A difference was detected in last few bytes of s1, so search bytewise */
|
||||
s1 = (char*)a1;
|
||||
s2 = (char*)a2;
|
||||
}
|
||||
|
||||
while (n-- > 0 && *s1 == *s2) {
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *s1 == *s2. */
|
||||
if (n == 0 || *s1 == '\0') {
|
||||
return 0;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_strncmp_impl(void)
|
||||
{
|
||||
}
|
56
components/newlib/src/string/strncpy.c
Normal file
56
components/newlib/src/string/strncpy.c
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
char *
|
||||
strncpy(char *__restrict dst0,
|
||||
const char *__restrict src0,
|
||||
size_t count)
|
||||
{
|
||||
char *dst = dst0;
|
||||
const char *src = src0;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
/* If SRC and DEST is aligned and count large enough, then copy words. */
|
||||
if (!UNALIGNED_X_Y(src, dst) && !TOO_SMALL_LITTLE_BLOCK(count)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* SRC and DEST are both "long int" aligned, try to do "long int"
|
||||
sized copies. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) {
|
||||
count -= sizeof(long int);
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
}
|
||||
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (count > 0) {
|
||||
--count;
|
||||
if ((*dst++ = *src++) == '\0') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (count-- > 0) {
|
||||
*dst++ = '\0';
|
||||
}
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_strncpy_impl(void)
|
||||
{
|
||||
}
|
@@ -16,6 +16,15 @@ if(CONFIG_LIBC_NEWLIB)
|
||||
"test_file.c")
|
||||
endif()
|
||||
|
||||
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
|
||||
list(APPEND srcs "test_misaligned_mem_performance.c")
|
||||
endif()
|
||||
|
||||
idf_component_register(SRCS "${srcs}"
|
||||
PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram
|
||||
WHOLE_ARCHIVE)
|
||||
|
||||
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
|
||||
set_source_files_properties(test_misaligned_mem_performance.c
|
||||
PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes")
|
||||
endif()
|
||||
|
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*
|
||||
* SPDX-License-Identifier: Unlicense OR CC0-1.0
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "esp_heap_caps.h"
|
||||
#include "hal/cpu_ll.h"
|
||||
#include "unity.h"
|
||||
|
||||
#define MAX_MEMTEST_SIZE 4096
|
||||
|
||||
uint32_t test_function_dest_src_size(void (*foo)(), bool pass_size)
|
||||
{
|
||||
uint32_t ccount1, ccount2;
|
||||
char* test_des = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
|
||||
char* test_src = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(test_des);
|
||||
TEST_ASSERT_NOT_NULL(test_src);
|
||||
|
||||
/* Prepare arrays for X-cmp functions to make the algorithm go through whole buffers. */
|
||||
memset(test_src, 'a', MAX_MEMTEST_SIZE);
|
||||
test_src[MAX_MEMTEST_SIZE - 1] = 0;
|
||||
memset(test_des, 'a', MAX_MEMTEST_SIZE);
|
||||
test_des[MAX_MEMTEST_SIZE - 1] = 'b';
|
||||
test_des[MAX_MEMTEST_SIZE - 1] = 0;
|
||||
|
||||
ccount1 = esp_cpu_get_cycle_count();
|
||||
if (pass_size) {
|
||||
foo(test_des + 1, test_src + 2, MAX_MEMTEST_SIZE - 2);
|
||||
} else {
|
||||
foo(test_des + 1, test_src + 2);
|
||||
}
|
||||
ccount2 = esp_cpu_get_cycle_count();
|
||||
|
||||
heap_caps_free(test_des);
|
||||
heap_caps_free(test_src);
|
||||
|
||||
return ccount2 - ccount1;
|
||||
}
|
||||
|
||||
TEST_CASE("memcpy", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(memcpy, true);
|
||||
/* esp32c2: 4128 cycles instead 28676. */
|
||||
TEST_ASSERT_LESS_THAN(5000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("memcmp", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(memcmp, true);
|
||||
/* esp32c2: 14259 cycles instead 49147. */
|
||||
TEST_ASSERT_LESS_THAN(16000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("memmove", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(memmove, true);
|
||||
/* esp32c2: 8086 cycles instead 33896. */
|
||||
TEST_ASSERT_LESS_THAN(10000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("memmove - overlapping", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount1, ccount2;
|
||||
char* buf = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
|
||||
|
||||
TEST_ASSERT_NOT_NULL(buf);
|
||||
|
||||
ccount1 = esp_cpu_get_cycle_count();
|
||||
memmove(buf + 5, buf + 2, MAX_MEMTEST_SIZE - 5);
|
||||
ccount2 = esp_cpu_get_cycle_count();
|
||||
|
||||
heap_caps_free(buf);
|
||||
|
||||
/* esp32c2: 11503 cycles instead 45024. */
|
||||
TEST_ASSERT_LESS_THAN(15000, ccount2 - ccount1);
|
||||
}
|
||||
|
||||
TEST_CASE("strcpy", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(strcpy, false);
|
||||
/* esp32c2: 17313 cycles instead 32771. */
|
||||
TEST_ASSERT_LESS_THAN(20000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("strcmp", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(strcmp, false);
|
||||
/* esp32c2: 13191 cycles instead 32775. */
|
||||
TEST_ASSERT_LESS_THAN(15000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("strncpy", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(strncpy, true);
|
||||
/* esp32c2: 21475 cycles instead 36859. */
|
||||
TEST_ASSERT_LESS_THAN(25000, ccount);
|
||||
}
|
||||
|
||||
TEST_CASE("strncmp", "[misaligned_mem]")
|
||||
{
|
||||
uint32_t ccount = test_function_dest_src_size(strncmp, true);
|
||||
/* esp32c2: 24369 cycles instead 49141. */
|
||||
TEST_ASSERT_LESS_THAN(27000, ccount);
|
||||
}
|
@@ -16,6 +16,7 @@ from pytest_embedded_idf.utils import idf_parametrize
|
||||
('psram_esp32', 'esp32'),
|
||||
('release_esp32', 'esp32'),
|
||||
('release_esp32c2', 'esp32c2'),
|
||||
('misaligned_mem', 'esp32c3'),
|
||||
],
|
||||
indirect=['config', 'target'],
|
||||
)
|
||||
|
@@ -0,0 +1 @@
|
||||
CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y
|
@@ -87,6 +87,7 @@ The following optimizations improve the execution of nearly all code, including
|
||||
:SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. Even though {IDF_TARGET_NAME} has a single precision hardware floating point unit, floating point calculations are always slower than integer calculations. If possible then use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point.
|
||||
:not SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. On {IDF_TARGET_NAME} these calculations are emulated in software and are very slow. If possible, use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point.
|
||||
- Avoid using double precision floating point arithmetic ``double``. These calculations are emulated in software and are very slow. If possible then use an integer-based representation, or single-precision floating point.
|
||||
:CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY: - Avoid misaligned 4-byte memory accesses in performance-critical code sections. For potential performance improvements, consider enabling :ref:`CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS`. Note that properly aligned memory operations will always execute at full speed without performance penalties. Requires additional ~190 bytes of IRAM and ~870 bytes of flash memory.
|
||||
|
||||
|
||||
.. only:: esp32s2 or esp32s3 or esp32p4
|
||||
|
@@ -79,8 +79,7 @@ newlib_component:
|
||||
- 'components/newlib/platform_include/**'
|
||||
- 'components/newlib/port/**'
|
||||
- 'components/newlib/priv_include/**'
|
||||
- 'components/newlib/*.c'
|
||||
- 'components/newlib/*.h'
|
||||
- 'components/newlib/src/**'
|
||||
allowed_licenses:
|
||||
- Apache-2.0
|
||||
- BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
|
Reference in New Issue
Block a user