Merge branch 'feat/newlib-optimized-misaligned-memory' into 'master'

feat(newlib): riscv: add CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS config option

See merge request espressif/esp-idf!36704
This commit is contained in:
Alexey Lapshin
2025-03-27 16:32:01 +08:00
46 changed files with 1049 additions and 58 deletions

View File

@@ -154,6 +154,10 @@ if(BOOTLOADER_BUILD)
rom_linker_script("libc-funcs")
else()
rom_linker_script("libc")
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
rom_linker_script("libc-suboptimal_for_misaligned_mem")
endif()
if(CONFIG_LIBC_NEWLIB)
rom_linker_script("newlib")
endif()
@@ -305,6 +309,9 @@ else() # Regular app build
if(CONFIG_ESP_ROM_HAS_NEWLIB AND NOT target STREQUAL "esp32" AND NOT target STREQUAL "esp32s2")
# ESP32 and S2 are a bit different, keep them as special cases in the target specific include section
rom_linker_script("libc")
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
rom_linker_script("libc-suboptimal_for_misaligned_mem")
endif()
if(CONFIG_LIBC_NEWLIB)
rom_linker_script("newlib")
endif()

View File

@@ -102,3 +102,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
config ESP_ROM_CONSOLE_OUTPUT_SECONDARY
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -31,3 +31,4 @@
#define ESP_ROM_HAS_VPRINTF_FUNC (1) // ROM has ets_vprintf
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x4000048c;
memmove = 0x40000490;
memcmp = 0x40000494;
strcpy = 0x40000498;
strncpy = 0x4000049c;
strcmp = 0x400004a0;
strncmp = 0x400004a4;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x40000484;
memset = 0x40000488;
memcpy = 0x4000048c;
memmove = 0x40000490;
memcmp = 0x40000494;
strcpy = 0x40000498;
strncpy = 0x4000049c;
strcmp = 0x400004a0;
strncmp = 0x400004a4;
strlen = 0x400004a8;
strstr = 0x400004ac;
bzero = 0x400004b0;

View File

@@ -98,3 +98,7 @@ config ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB
config ESP_ROM_CONSOLE_OUTPUT_SECONDARY
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -30,3 +30,4 @@
#define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
#define ESP_ROM_CONSOLE_OUTPUT_SECONDARY (1) // The console output functions will also output to the USB-serial secondary console
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x40000358;
memmove = 0x4000035c;
memcmp = 0x40000360;
strcpy = 0x40000364;
strncpy = 0x40000368;
strcmp = 0x4000036c;
strncmp = 0x40000370;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x40000350;
memset = 0x40000354;
memcpy = 0x40000358;
memmove = 0x4000035c;
memcmp = 0x40000360;
strcpy = 0x40000364;
strncpy = 0x40000368;
strcmp = 0x4000036c;
strncmp = 0x40000370;
strlen = 0x40000374;
strstr = 0x40000378;
bzero = 0x4000037c;

View File

@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
config ESP_ROM_CLIC_INT_THRESH_PATCH
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -32,3 +32,4 @@
#define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage.
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_CLIC_INT_THRESH_PATCH (1) // ROM version of esprv_intc_int_set_threshold incorrectly assumes lowest MINTTHRESH is 0x1F, should be 0xF
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x400004bc;
memmove = 0x400004c0;
memcmp = 0x400004c4;
strcpy = 0x400004c8;
strncpy = 0x400004cc;
strcmp = 0x400004d0;
strncmp = 0x400004d4;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x400004b4;
memset = 0x400004b8;
memcpy = 0x400004bc;
memmove = 0x400004c0;
memcmp = 0x400004c4;
strcpy = 0x400004c8;
strncpy = 0x400004cc;
strcmp = 0x400004d0;
strncmp = 0x400004d4;
strlen = 0x400004d8;
strstr = 0x400004dc;
bzero = 0x400004e0;

View File

@@ -114,3 +114,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
config ESP_ROM_NO_USB_SERIAL_OUTPUT_API
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -34,3 +34,4 @@
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x400004ac;
memmove = 0x400004b0;
memcmp = 0x400004b4;
strcpy = 0x400004b8;
strncpy = 0x400004bc;
strcmp = 0x400004c0;
strncmp = 0x400004c4;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x400004a4;
memset = 0x400004a8;
memcpy = 0x400004ac;
memmove = 0x400004b0;
memcmp = 0x400004b4;
strcpy = 0x400004b8;
strncpy = 0x400004bc;
strcmp = 0x400004c0;
strncmp = 0x400004c4;
strlen = 0x400004c8;
strstr = 0x400004cc;
bzero = 0x400004d0;

View File

@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
config ESP_ROM_CACHE_WB_INVLD_LOW_RANGE
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -32,3 +32,4 @@
#define ESP_ROM_USB_OTG_NUM (-1) // No USB_OTG CDC in the ROM, set -1 for Kconfig usage.
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_CACHE_WB_INVLD_LOW_RANGE (1) // ROM `Cache_WriteBack_Addr` and `Cache_Invalidate_Addr` can only access low vaddr parts
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x400004bc;
memmove = 0x400004c0;
memcmp = 0x400004c4;
strcpy = 0x400004c8;
strncpy = 0x400004cc;
strcmp = 0x400004d0;
strncmp = 0x400004d4;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x400004b4;
memset = 0x400004b8;
memcpy = 0x400004bc;
memmove = 0x400004c0;
memcmp = 0x400004c4;
strcpy = 0x400004c8;
strncpy = 0x400004cc;
strcmp = 0x400004d0;
strncmp = 0x400004d4;
strlen = 0x400004d8;
strstr = 0x400004dc;
bzero = 0x400004e0;

View File

@@ -106,3 +106,7 @@ config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
config ESP_ROM_NO_USB_SERIAL_OUTPUT_API
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -32,3 +32,4 @@
#define ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB (1) // ROM supports the HP core to jump to the RTC memory to execute stub code after waking up from deepsleep.
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_NO_USB_SERIAL_OUTPUT_API (1) // ROM does not export the usb-serial-jtag write char function
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x400004a4;
memmove = 0x400004a8;
memcmp = 0x400004ac;
strcpy = 0x400004b0;
strncpy = 0x400004b4;
strcmp = 0x400004b8;
strncmp = 0x400004bc;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x4000049c;
memset = 0x400004a0;
memcpy = 0x400004a4;
memmove = 0x400004a8;
memcmp = 0x400004ac;
strcpy = 0x400004b0;
strncpy = 0x400004b4;
strcmp = 0x400004b8;
strncmp = 0x400004bc;
strlen = 0x400004c0;
strstr = 0x400004c4;
bzero = 0x400004c8;

View File

@@ -82,3 +82,7 @@ config ESP_ROM_CLIC_INT_TYPE_PATCH
config ESP_ROM_HAS_OUTPUT_PUTC_FUNC
bool
default y
config ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
bool
default y

View File

@@ -26,3 +26,4 @@
#define ESP_ROM_HAS_VERSION (1) // ROM has version/eco information
#define ESP_ROM_CLIC_INT_TYPE_PATCH (1) // ROM api esprv_intc_int_set_type configuring edge type interrupt is invalid
#define ESP_ROM_HAS_OUTPUT_PUTC_FUNC (1) // ROM has esp_rom_output_putc (or ets_write_char_uart)
#define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY (1) // ROM mem/str functions are not optimized well for misaligned memory access.

View File

@@ -0,0 +1,15 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
/* These functions are not well optimized for misaligned memory access.
* See details in the https://sourceware.org/pipermail/newlib/2025/021470.html
* patch series. */
memcpy = 0x4fc0026c;
memmove = 0x4fc00270;
memcmp = 0x4fc00274;
strcpy = 0x4fc00278;
strncpy = 0x4fc0027c;
strcmp = 0x4fc00280;
strncmp = 0x4fc00284;

View File

@@ -1,17 +1,10 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
esp_rom_newlib_init_common_mutexes = 0x4fc00264;
memset = 0x4fc00268;
memcpy = 0x4fc0026c;
memmove = 0x4fc00270;
memcmp = 0x4fc00274;
strcpy = 0x4fc00278;
strncpy = 0x4fc0027c;
strcmp = 0x4fc00280;
strncmp = 0x4fc00284;
strlen = 0x4fc00288;
strstr = 0x4fc0028c;
bzero = 0x4fc00290;

View File

@@ -1,3 +1,12 @@
if(CONFIG_IDF_TARGET_ARCH_RISCV AND NOT DEFINED CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY)
message(WARNING
"You probably added a new chip support. Please do the next steps:\n"
" 1) Check if ROM functions implementation is optimized on misaligned memory operations.\n"
" 2) Define ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY to esp_rom_caps.h. If it is zero:\n"
" 2.1) Move some functions out from *.rom.libc.ld file (see *.rom.libc-suboptimal_for_misaligned_mem.ld).\n"
"Find a related test in the newlib component to use as a reference.")
endif()
set(srcs "test_app_main.c"
"test_libgcc.c"
"test_printf.c")

View File

@@ -37,6 +37,23 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND)
list(APPEND srcs "src/port/xtensa/stdatomic_s32c1i.c")
endif()
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
list(APPEND srcs
"src/string/memcmp.c"
"src/string/memmove.c"
"src/string/strncmp.c"
"src/string/strncpy.c"
"src/port/riscv/memcpy.c"
"src/port/riscv/strcpy.c"
"src/port/riscv/strcmp.S")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl")
endif()
if(CONFIG_LIBC_NEWLIB)
list(APPEND srcs
"src/flockfile.c"

View File

@@ -143,6 +143,23 @@ menu "LibC"
select ESP_TIME_FUNCS_USE_NONE
endchoice
config LIBC_OPTIMIZED_MISALIGNED_ACCESS
bool "Use performance-optimized memXXX/strXXX functions on misaligned memory access"
default n
depends on ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY
help
Enables performance-optimized implementations of memory and string functions
when handling misaligned memory.
This increases the image size by ~1000 bytes.
Optimized functions include:
- memcpy
- memset
- memmove
- str[n]cpy
- str[n]cmp
endmenu # LibC
config STDATOMIC_S32C1I_SPIRAM_WORKAROUND

View File

@@ -0,0 +1,104 @@
/*
* SPDX-FileCopyrightText: 2017 SiFive Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
This copyrighted material is made available to anyone wishing to use,
modify, copy, or redistribute it subject to the terms and conditions
of the FreeBSD License. This program is distributed in the hope that
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
including the implied warranties of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. A copy of this license is available at
http://www.opensource.org/licenses.
*/
#include <string.h>
#include <stdint.h>
#include "esp_attr.h"
#include "../../string/local.h"
#define unlikely(X) __builtin_expect (!!(X), 0)
IRAM_ATTR
void *
__attribute__((optimize("-Os")))
__inhibit_loop_to_libcall
memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
{
#define BODY(a, b, t) { \
t tt = *b; \
a++, b++; \
*(a - 1) = tt; \
}
char *a = (char *)aa;
const char *b = (const char *)bb;
char *end = a + n;
uintptr_t msk = sizeof(long) - 1;
#if __riscv_misaligned_slow || __riscv_misaligned_fast
if (n < sizeof(long))
#else
if (unlikely((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
|| n < sizeof(long)))
#endif
{
small:
if (__builtin_expect(a < end, 1))
while (a < end) {
BODY(a, b, char);
}
return aa;
}
if (unlikely(((uintptr_t)a & msk) != 0))
while ((uintptr_t)a & msk) {
BODY(a, b, char);
}
long *la = (long *)a;
const long *lb = (const long *)b;
long *lend = (long *)((uintptr_t)end & ~msk);
if (unlikely(lend - la > 8)) {
while (lend - la > 8) {
long b0 = *lb++;
long b1 = *lb++;
long b2 = *lb++;
long b3 = *lb++;
long b4 = *lb++;
long b5 = *lb++;
long b6 = *lb++;
long b7 = *lb++;
long b8 = *lb++;
*la++ = b0;
*la++ = b1;
*la++ = b2;
*la++ = b3;
*la++ = b4;
*la++ = b5;
*la++ = b6;
*la++ = b7;
*la++ = b8;
}
}
while (la < lend) {
BODY(la, lb, long);
}
a = (char *)la;
b = (const char *)lb;
if (unlikely(a < end)) {
goto small;
}
return aa;
}
// Hook to force the linker to include this file
void esp_libc_include_memcpy_impl(void)
{
}

View File

@@ -0,0 +1,195 @@
/*
* SPDX-FileCopyrightText: 2017 SiFive Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
This copyrighted material is made available to anyone wishing to use,
modify, copy, or redistribute it subject to the terms and conditions
of the FreeBSD License. This program is distributed in the hope that
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
including the implied warranties of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. A copy of this license is available at
http://www.opensource.org/licenses.
*/
#include <sys/asm.h>
.text
.globl strcmp
.type strcmp, @function
strcmp:
li t2, -1
#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
or a4, a0, a1
and a4, a4, SZREG-1
bnez a4, .Lmisaligned
#endif
#if SZREG == 4
li a5, 0x7f7f7f7f
#else
ld a5, mask
#endif
.macro check_one_word i n
REG_L a2, \i*SZREG(a0)
REG_L a3, \i*SZREG(a1)
and t0, a2, a5
or t1, a2, a5
add t0, t0, a5
or t0, t0, t1
bne t0, t2, .Lnull\i
.if \i+1-\n
bne a2, a3, .Lmismatch
.else
add a0, a0, \n*SZREG
add a1, a1, \n*SZREG
beq a2, a3, .Lloop
# fall through to .Lmismatch
.endif
.endm
.macro foundnull i n
.ifne \i
.Lnull\i:
add a0, a0, \i*SZREG
add a1, a1, \i*SZREG
.ifeq \i-1
.Lnull0:
.endif
bne a2, a3, .Lmisaligned
li a0, 0
ret
.endif
.endm
.Lloop:
# examine full words at a time, favoring strings of a couple dozen chars
#if __riscv_xlen == 32
check_one_word 0 5
check_one_word 1 5
check_one_word 2 5
check_one_word 3 5
check_one_word 4 5
#else
check_one_word 0 3
check_one_word 1 3
check_one_word 2 3
#endif
# backwards branch to .Lloop contained above
.Lmismatch:
# words don't match, but a2 has no null byte.
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#if __riscv_xlen == 64
sll a4, a2, 48
sll a5, a3, 48
bne a4, a5, .Lmismatch_upper
sll a4, a2, 32
sll a5, a3, 32
bne a4, a5, .Lmismatch_upper
#endif
sll a4, a2, 16
sll a5, a3, 16
bne a4, a5, .Lmismatch_upper
srl a4, a2, 8*SZREG-16
srl a5, a3, 8*SZREG-16
sub a0, a4, a5
and a1, a0, 0xff
bnez a1, 1f
ret
.Lmismatch_upper:
srl a4, a4, 8*SZREG-16
srl a5, a5, 8*SZREG-16
sub a0, a4, a5
and a1, a0, 0xff
bnez a1, 1f
ret
1:and a4, a4, 0xff
and a5, a5, 0xff
sub a0, a4, a5
ret
#else
#if __riscv_xlen == 64
srl a4, a2, 48
srl a5, a3, 48
bne a4, a5, .Lmismatch_lower
srl a4, a2, 32
srl a5, a3, 32
bne a4, a5, .Lmismatch_lower
#endif
srl a4, a2, 16
srl a5, a3, 16
bne a4, a5, .Lmismatch_lower
srl a4, a2, 8
srl a5, a3, 8
bne a4, a5, 1f
and a4, a2, 0xff
and a5, a3, 0xff
1:sub a0, a4, a5
ret
.Lmismatch_lower:
srl a2, a4, 8
srl a3, a5, 8
bne a2, a3, 1f
and a2, a4, 0xff
and a3, a5, 0xff
1:sub a0, a2, a3
ret
#endif
.Lmisaligned:
# misaligned
lbu a2, 0(a0)
lbu a3, 0(a1)
add a0, a0, 1
add a1, a1, 1
bne a2, a3, 1f
bnez a2, .Lmisaligned
1:
sub a0, a2, a3
ret
# cases in which a null byte was detected
#if __riscv_xlen == 32
foundnull 0 5
foundnull 1 5
foundnull 2 5
foundnull 3 5
foundnull 4 5
#else
foundnull 0 3
foundnull 1 3
foundnull 2 3
#endif
.size strcmp, .-strcmp
#if SZREG == 8
.section .srodata.cst8,"aM",@progbits,8
.align 3
mask:
.dword 0x7f7f7f7f7f7f7f7f
#endif
.text
.globl esp_libc_include_strcmp_impl
.type esp_libc_include_strcmp_impl, @function
esp_libc_include_strcmp_impl:

View File

@@ -0,0 +1,104 @@
/*
* SPDX-FileCopyrightText: 2017 SiFive Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
/* Copyright (c) 2017 SiFive Inc. All rights reserved.
This copyrighted material is made available to anyone wishing to use,
modify, copy, or redistribute it subject to the terms and conditions
of the FreeBSD License. This program is distributed in the hope that
it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
including the implied warranties of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE. A copy of this license is available at
http://www.opensource.org/licenses.
*/
#include <string.h>
#include <stdint.h>
__attribute__((always_inline))
static inline
unsigned long __newlib__libc_detect_null(unsigned long w)
{
unsigned long mask = 0x7f7f7f7f;
if (sizeof(long) == 8) {
mask = ((mask << 16) << 16) | mask;
}
return ~(((w & mask) + mask) | w | mask);
}
__attribute__((optimize("-Os")))
char *strcpy(char *dst, const char *src)
{
char *dst0 = dst;
#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof(long) - 1);
if (__builtin_expect(!misaligned, 1))
#endif
{
long *ldst = (long *)dst;
const long *lsrc = (const long *)src;
while (!__newlib__libc_detect_null(*lsrc)) {
*ldst++ = *lsrc++;
}
dst = (char *)ldst;
src = (const char *)lsrc;
char c0 = src[0];
char c1 = src[1];
char c2 = src[2];
if (!(*dst++ = c0)) {
return dst0;
}
if (!(*dst++ = c1)) {
return dst0;
}
char c3 = src[3];
if (!(*dst++ = c2)) {
return dst0;
}
if (sizeof(long) == 4) {
goto out;
}
char c4 = src[4];
if (!(*dst++ = c3)) {
return dst0;
}
char c5 = src[5];
if (!(*dst++ = c4)) {
return dst0;
}
char c6 = src[6];
if (!(*dst++ = c5)) {
return dst0;
}
if (!(*dst++ = c6)) {
return dst0;
}
out:
*dst++ = 0;
return dst0;
}
char ch;
do {
ch = *src;
src++;
dst++;
*(dst - 1) = ch;
} while (ch);
return dst0;
}
// Hook to force the linker to include this file
void esp_libc_include_strcpy_impl(void)
{
}

View File

@@ -0,0 +1,62 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <_ansi.h>
#include <limits.h>
/*
Taken from glibc:
Add the compiler optimization to inhibit loop transformation to library
calls. This is used to avoid recursive calls in memset and memmove
default implementations.
*/
# define __inhibit_loop_to_libcall \
__attribute__ ((__optimize__ ("-fno-tree-loop-distribute-patterns")))
/* Nonzero if X is not aligned on a "long" boundary.
* This macro is used to skip a few bytes to find an aligned pointer.
* It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
* to avoid small performance penalties (if they are not zero). */
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
#define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow)
#if _HAVE_HW_MISALIGNED_ACCESS
/* Hardware performs unaligned operations with little
* to no penalty compared to byte-to-byte copy. */
#define UNALIGNED_X_Y(X, Y) (0)
#else /* _HAVE_HW_MISALIGNED_ACCESS */
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED_X_Y(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
#endif /* _HAVE_HW_MISALIGNED_ACCESS */
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLE_BLOCK_SIZE (sizeof (long))
/* How many bytes are copied each iteration of the 4X unrolled loop. */
#define BIG_BLOCK_SIZE (sizeof (long) << 2)
/* Threshold for punting to the little block byte copier. */
#define TOO_SMALL_LITTLE_BLOCK(LEN) ((LEN) < LITTLE_BLOCK_SIZE)
/* Threshold for punting to the big block byte copier. */
#define TOO_SMALL_BIG_BLOCK(LEN) ((LEN) < BIG_BLOCK_SIZE)
/* Macros for detecting endchar. */
#if LONG_MAX == 2147483647L
#define DETECT_NULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECT_NULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
/* Returns nonzero if (long)X contains the byte used to fill (long)MASK. */
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))

View File

@@ -0,0 +1,59 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include "local.h"
__attribute__((optimize("-Os")))
int
memcmp(const void *m1,
const void *m2,
size_t n)
{
unsigned char *s1 = (unsigned char *) m1;
unsigned char *s2 = (unsigned char *) m2;
unsigned long *a1;
unsigned long *a2;
/* If the size is too small, or either pointer is unaligned,
then we punt to the byte compare loop. Hopefully this will
not turn up in inner loops. */
if (!TOO_SMALL_LITTLE_BLOCK(n) && !UNALIGNED_X_Y(s1, s2)) {
/* Otherwise, load and compare the blocks of memory one
word at a time. */
a1 = (unsigned long*) s1;
a2 = (unsigned long*) s2;
while (!TOO_SMALL_LITTLE_BLOCK(n)) {
if (*a1 != *a2) {
break;
}
a1++;
a2++;
n -= LITTLE_BLOCK_SIZE;
}
/* check m mod LITTLE_BLOCK_SIZE remaining characters */
s1 = (unsigned char*)a1;
s2 = (unsigned char*)a2;
}
while (n--) {
if (*s1 != *s2) {
return *s1 - *s2;
}
s1++;
s2++;
}
return 0;
}
// Hook to force the linker to include this file
void esp_libc_include_memcmp_impl(void)
{
}

View File

@@ -0,0 +1,88 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include <_ansi.h>
#include <stddef.h>
#include <limits.h>
#include "local.h"
__attribute__((optimize("-Os")))
void *
__inhibit_loop_to_libcall
memmove(void *dst_void,
const void *src_void,
size_t length)
{
char *dst = dst_void;
const char *src = src_void;
long *aligned_dst;
const long *aligned_src;
if (src < dst && dst < src + length) {
/* Destructive overlap...have to copy backwards */
src += length;
dst += length;
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
*--aligned_dst = *--aligned_src;
length -= LITTLE_BLOCK_SIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*--dst = *--src;
}
} else {
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (!TOO_SMALL_BIG_BLOCK(length)) {
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
length -= BIG_BLOCK_SIZE;
}
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
*aligned_dst++ = *aligned_src++;
length -= LITTLE_BLOCK_SIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*dst++ = *src++;
}
}
return dst_void;
}
// Hook to force the linker to include this file
void esp_libc_include_memmove_impl(void)
{
}

View File

@@ -0,0 +1,63 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include <limits.h>
#include "local.h"
__attribute__((optimize("-Os")))
int
strncmp(const char *s1,
const char *s2,
size_t n)
{
unsigned long *a1;
unsigned long *a2;
if (n == 0) {
return 0;
}
/* If s1 or s2 are unaligned, then compare bytes. */
if (!UNALIGNED_X_Y(s1, s2)) {
/* If s1 and s2 are word-aligned, compare them a word at a time. */
a1 = (unsigned long*)s1;
a2 = (unsigned long*)s2;
while (n >= sizeof(long) && *a1 == *a2) {
n -= sizeof(long);
/* If we've run out of bytes or hit a null, return zero
since we already know *a1 == *a2. */
if (n == 0 || DETECT_NULL(*a1)) {
return 0;
}
a1++;
a2++;
}
/* A difference was detected in last few bytes of s1, so search bytewise */
s1 = (char*)a1;
s2 = (char*)a2;
}
while (n-- > 0 && *s1 == *s2) {
/* If we've run out of bytes or hit a null, return zero
since we already know *s1 == *s2. */
if (n == 0 || *s1 == '\0') {
return 0;
}
s1++;
s2++;
}
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
}
// Hook to force the linker to include this file
void esp_libc_include_strncmp_impl(void)
{
}

View File

@@ -0,0 +1,56 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include <limits.h>
#include "local.h"
__attribute__((optimize("-Os")))
char *
strncpy(char *__restrict dst0,
const char *__restrict src0,
size_t count)
{
char *dst = dst0;
const char *src = src0;
long *aligned_dst;
const long *aligned_src;
/* If SRC and DEST is aligned and count large enough, then copy words. */
if (!UNALIGNED_X_Y(src, dst) && !TOO_SMALL_LITTLE_BLOCK(count)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* SRC and DEST are both "long int" aligned, try to do "long int"
sized copies. */
while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) {
count -= sizeof(long int);
*aligned_dst++ = *aligned_src++;
}
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (count > 0) {
--count;
if ((*dst++ = *src++) == '\0') {
break;
}
}
while (count-- > 0) {
*dst++ = '\0';
}
return dst0;
}
// Hook to force the linker to include this file
void esp_libc_include_strncpy_impl(void)
{
}

View File

@@ -16,6 +16,15 @@ if(CONFIG_LIBC_NEWLIB)
"test_file.c")
endif()
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
list(APPEND srcs "test_misaligned_mem_performance.c")
endif()
idf_component_register(SRCS "${srcs}"
PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram
WHOLE_ARCHIVE)
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
set_source_files_properties(test_misaligned_mem_performance.c
PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes")
endif()

View File

@@ -0,0 +1,108 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
#include <stdint.h>
#include <string.h>
#include "esp_heap_caps.h"
#include "hal/cpu_ll.h"
#include "unity.h"
#define MAX_MEMTEST_SIZE 4096
uint32_t test_function_dest_src_size(void (*foo)(), bool pass_size)
{
uint32_t ccount1, ccount2;
char* test_des = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
char* test_src = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
TEST_ASSERT_NOT_NULL(test_des);
TEST_ASSERT_NOT_NULL(test_src);
/* Prepare arrays for X-cmp functions to make the algorithm go through whole buffers. */
memset(test_src, 'a', MAX_MEMTEST_SIZE);
test_src[MAX_MEMTEST_SIZE - 1] = 0;
memset(test_des, 'a', MAX_MEMTEST_SIZE);
test_des[MAX_MEMTEST_SIZE - 1] = 'b';
test_des[MAX_MEMTEST_SIZE - 1] = 0;
ccount1 = esp_cpu_get_cycle_count();
if (pass_size) {
foo(test_des + 1, test_src + 2, MAX_MEMTEST_SIZE - 2);
} else {
foo(test_des + 1, test_src + 2);
}
ccount2 = esp_cpu_get_cycle_count();
heap_caps_free(test_des);
heap_caps_free(test_src);
return ccount2 - ccount1;
}
TEST_CASE("memcpy", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(memcpy, true);
/* esp32c2: 4128 cycles instead 28676. */
TEST_ASSERT_LESS_THAN(5000, ccount);
}
TEST_CASE("memcmp", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(memcmp, true);
/* esp32c2: 14259 cycles instead 49147. */
TEST_ASSERT_LESS_THAN(16000, ccount);
}
TEST_CASE("memmove", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(memmove, true);
/* esp32c2: 8086 cycles instead 33896. */
TEST_ASSERT_LESS_THAN(10000, ccount);
}
TEST_CASE("memmove - overlapping", "[misaligned_mem]")
{
uint32_t ccount1, ccount2;
char* buf = heap_caps_aligned_alloc(32, MAX_MEMTEST_SIZE, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
TEST_ASSERT_NOT_NULL(buf);
ccount1 = esp_cpu_get_cycle_count();
memmove(buf + 5, buf + 2, MAX_MEMTEST_SIZE - 5);
ccount2 = esp_cpu_get_cycle_count();
heap_caps_free(buf);
/* esp32c2: 11503 cycles instead 45024. */
TEST_ASSERT_LESS_THAN(15000, ccount2 - ccount1);
}
TEST_CASE("strcpy", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(strcpy, false);
/* esp32c2: 17313 cycles instead 32771. */
TEST_ASSERT_LESS_THAN(20000, ccount);
}
TEST_CASE("strcmp", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(strcmp, false);
/* esp32c2: 13191 cycles instead 32775. */
TEST_ASSERT_LESS_THAN(15000, ccount);
}
TEST_CASE("strncpy", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(strncpy, true);
/* esp32c2: 21475 cycles instead 36859. */
TEST_ASSERT_LESS_THAN(25000, ccount);
}
TEST_CASE("strncmp", "[misaligned_mem]")
{
uint32_t ccount = test_function_dest_src_size(strncmp, true);
/* esp32c2: 24369 cycles instead 49141. */
TEST_ASSERT_LESS_THAN(27000, ccount);
}

View File

@@ -16,6 +16,7 @@ from pytest_embedded_idf.utils import idf_parametrize
('psram_esp32', 'esp32'),
('release_esp32', 'esp32'),
('release_esp32c2', 'esp32c2'),
('misaligned_mem', 'esp32c3'),
],
indirect=['config', 'target'],
)

View File

@@ -0,0 +1 @@
CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y

View File

@@ -87,6 +87,7 @@ The following optimizations improve the execution of nearly all code, including
:SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. Even though {IDF_TARGET_NAME} has a single precision hardware floating point unit, floating point calculations are always slower than integer calculations. If possible then use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point.
:not SOC_CPU_HAS_FPU: - Avoid using floating point arithmetic ``float``. On {IDF_TARGET_NAME} these calculations are emulated in software and are very slow. If possible, use fixed point representations, a different method of integer representation, or convert part of the calculation to be integer only before switching to floating point.
- Avoid using double precision floating point arithmetic ``double``. These calculations are emulated in software and are very slow. If possible then use an integer-based representation, or single-precision floating point.
:CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY: - Avoid misaligned 4-byte memory accesses in performance-critical code sections. For potential performance improvements, consider enabling :ref:`CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS`. Note that properly aligned memory operations will always execute at full speed without performance penalties. Requires additional ~190 bytes of IRAM and ~870 bytes of flash memory.
.. only:: esp32s2 or esp32s3 or esp32p4

View File

@@ -79,8 +79,7 @@ newlib_component:
- 'components/newlib/platform_include/**'
- 'components/newlib/port/**'
- 'components/newlib/priv_include/**'
- 'components/newlib/*.c'
- 'components/newlib/*.h'
- 'components/newlib/src/**'
allowed_licenses:
- Apache-2.0
- BSD-2-Clause-FreeBSD AND Apache-2.0