From 913d38ba14add74aca1e7420deaeb917172a56b6 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Sun, 17 Aug 2025 11:52:05 +0700 Subject: [PATCH] fix(newlib): fix CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS for c2/c3/c6/h2/h21 PMP configurations for load and store addresses may have different permissions (e.g., "R" vs. "RW"). Due to the timing alignment of internal signals, the address permission check may be incorrectly applied during the second part of a misaligned access transaction. As a workaround, insert two instructions (e.g. ADDI/NOP) between accessing to different memory regions. This spacing avoids the false permission check caused by signal timing overlap. --- components/esp_rom/CMakeLists.txt | 8 +- components/newlib/CMakeLists.txt | 43 +++-- components/newlib/Kconfig | 2 +- .../{src => priv_include}/string/local.h | 2 + components/newlib/src/libc.lf | 2 - components/newlib/src/newlib.lf | 21 +-- components/newlib/src/picolibc/libc.lf | 4 - components/newlib/src/port/riscv/memcpy.c | 96 ++++++++--- components/newlib/src/port/riscv/memmove.c | 150 ++++++++++++++++++ components/newlib/src/port/riscv/strcpy.c | 2 +- components/newlib/src/string/memcmp.c | 3 +- components/newlib/src/string/memmove.c | 88 ---------- components/newlib/src/string/strncmp.c | 3 +- components/newlib/src/string/strncpy.c | 4 +- .../test_apps/newlib/main/CMakeLists.txt | 7 +- ...performance.c => test_misaligned_access.c} | 54 +++++++ .../newlib/sdkconfig.ci.misaligned_mem | 1 + .../esp32c2/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32c2/include/soc/soc_caps.h | 2 + .../esp32c3/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32c3/include/soc/soc_caps.h | 2 + .../esp32c6/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32c6/include/soc/soc_caps.h | 2 + .../esp32h2/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32h2/include/soc/soc_caps.h | 2 + .../esp32h21/include/soc/Kconfig.soc_caps.in | 4 + .../soc/esp32h21/include/soc/soc_caps.h | 2 + 27 files changed, 356 insertions(+), 164 deletions(-) rename components/newlib/{src => priv_include}/string/local.h (98%) create mode 100644 components/newlib/src/port/riscv/memmove.c delete mode 100644 components/newlib/src/string/memmove.c rename components/newlib/test_apps/newlib/main/{test_misaligned_mem_performance.c => test_misaligned_access.c} (63%) diff --git a/components/esp_rom/CMakeLists.txt b/components/esp_rom/CMakeLists.txt index 69e01d0abd..cb22d5d2ca 100644 --- a/components/esp_rom/CMakeLists.txt +++ b/components/esp_rom/CMakeLists.txt @@ -149,6 +149,10 @@ if(ESP_TEE_BUILD) if(CONFIG_ESP_ROM_HAS_NEWLIB_NANO_FORMAT) rom_linker_script("newlib-nano") endif() + rom_linker_script("libc") + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) # TODO IDF-13852: use optimized memcpy for TEE ? + rom_linker_script("libc-suboptimal_for_misaligned_mem") + endif() endif() if(BOOTLOADER_BUILD) @@ -173,8 +177,8 @@ if(BOOTLOADER_BUILD) else() rom_linker_script("libc") endif() - if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY - AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) + # For bootloader use only ROM functions due to 'relocation truncated to fit' error rom_linker_script("libc-suboptimal_for_misaligned_mem") endif() if(CONFIG_LIBC_NEWLIB) diff --git a/components/newlib/CMakeLists.txt b/components/newlib/CMakeLists.txt index 1bb182f584..7c8a1faa92 100644 --- a/components/newlib/CMakeLists.txt +++ b/components/newlib/CMakeLists.txt @@ -38,23 +38,21 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) endif() if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - if(CMAKE_C_COMPILER_ID MATCHES "Clang") - # TODO IDF-13089: remove this block and modify src/newlib.lf when clang was upgraded - list(APPEND srcs - "src/string/memcmp.c" - "src/string/memmove.c" - "src/string/strncmp.c" - "src/string/strncpy.c" - "src/port/riscv/memcpy.c" - "src/port/riscv/strcpy.c" - "src/port/riscv/strcmp.S") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl") - endif() + list(APPEND srcs + "src/port/riscv/memcpy.c" + "src/port/riscv/memmove.c" + "src/string/memcmp.c" + "src/port/riscv/strcpy.c" + "src/string/strncpy.c" + "src/string/strncmp.c" + "src/port/riscv/strcmp.S") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl") endif() if(CONFIG_LIBC_NEWLIB) @@ -96,6 +94,17 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) PROPERTIES COMPILE_FLAGS "-mno-disable-hardware-atomics") endif() +if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + # TODO GCC-419 and IDF-13089: cleanup files + set_source_files_properties("src/string/memcmp.c" + "src/string/strncmp.c" + "src/string/strncpy.c" + "src/port/riscv/memcpy.c" + "src/port/riscv/memmove.c" + "src/port/riscv/strcpy.c" + PROPERTIES COMPILE_FLAGS -O2) +endif() + # Forces the linker to include heap, syscall, pthread, assert, and retargetable locks from this component, # instead of the implementations provided by newlib. list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_heap_impl") diff --git a/components/newlib/Kconfig b/components/newlib/Kconfig index 9a30784c21..92d9afc412 100644 --- a/components/newlib/Kconfig +++ b/components/newlib/Kconfig @@ -151,7 +151,7 @@ menu "LibC" Enables performance-optimized implementations of memory and string functions when handling misaligned memory. - This increases the image size by ~1000 bytes. + Require approximately 800–1000 bytes of IRAM. Optimized functions include: - memcpy diff --git a/components/newlib/src/string/local.h b/components/newlib/priv_include/string/local.h similarity index 98% rename from components/newlib/src/string/local.h rename to components/newlib/priv_include/string/local.h index bfc1260efc..d6df832838 100644 --- a/components/newlib/src/string/local.h +++ b/components/newlib/priv_include/string/local.h @@ -22,7 +22,9 @@ * to avoid small performance penalties (if they are not zero). */ #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1)) +#ifndef _HAVE_HW_MISALIGNED_ACCESS #define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow) +#endif #if _HAVE_HW_MISALIGNED_ACCESS /* Hardware performs unaligned operations with little diff --git a/components/newlib/src/libc.lf b/components/newlib/src/libc.lf index a8ebcf8930..05fdef2306 100644 --- a/components/newlib/src/libc.lf +++ b/components/newlib/src/libc.lf @@ -11,8 +11,6 @@ archive: else: libc.a entries: - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y: - libc_a-memcpy (noflash) if SPIRAM_CACHE_WORKAROUND = y: # The following libs are either used in a lot of places or in critical # code. (such as panic or abort) diff --git a/components/newlib/src/newlib.lf b/components/newlib/src/newlib.lf index 359acd6345..e311ff5b17 100644 --- a/components/newlib/src/newlib.lf +++ b/components/newlib/src/newlib.lf @@ -1,6 +1,14 @@ [mapping:newlib] archive: libnewlib.a entries: + if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y: + memcpy (noflash) + memmove (noflash) + memcmp (noflash) + strcpy (noflash) + strncpy (noflash) + strcmp (noflash) + strncmp (noflash) if LIBC_MISC_IN_IRAM = y: if HEAP_PLACE_FUNCTION_INTO_FLASH = n: heap (noflash) @@ -11,16 +19,3 @@ entries: stdatomic_s32c1i (noflash) if STDATOMIC_S32C1I_SPIRAM_WORKAROUND = y: stdatomic_s32c1i (noflash) - if CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TOOLCHAIN_GCC = y: - # memcpy() should be in IRAM due to early system use - # Others can stay in flash (performance difference is minimal) - # - # Performance Comparison: - # | func | flash | iram | - # |---------|-------|-------| - # | memcmp | 15986 | 15170 | - # | strcpy | 17499 | 16660 | - # | strcmp | 13125 | 11147 | - # | strncpy | 17386 | 16668 | - # | strncmp | 22161 | 21782 | - libc_a-memcpy (noflash) diff --git a/components/newlib/src/picolibc/libc.lf b/components/newlib/src/picolibc/libc.lf index 0892557473..f2a97b4cc9 100644 --- a/components/newlib/src/picolibc/libc.lf +++ b/components/newlib/src/picolibc/libc.lf @@ -8,10 +8,6 @@ archive: libc.a entries: - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_RISCV = y: - memcpy-asm (noflash) - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_XTENSA = y: - libc_machine_xtensa_memcpy (noflash) if SPIRAM_CACHE_WORKAROUND = y: # The following libs are either used in a lot of places or in critical code. (such as panic or abort) # Thus, they shall always be placed in IRAM. diff --git a/components/newlib/src/port/riscv/memcpy.c b/components/newlib/src/port/riscv/memcpy.c index bbee1d3f02..6c84199d91 100644 --- a/components/newlib/src/port/riscv/memcpy.c +++ b/components/newlib/src/port/riscv/memcpy.c @@ -18,14 +18,11 @@ #include #include -#include "esp_attr.h" -#include "../../string/local.h" +#include "string/local.h" #define unlikely(X) __builtin_expect (!!(X), 0) -IRAM_ATTR void * -__attribute__((optimize("-Os"))) __inhibit_loop_to_libcall memcpy(void *__restrict aa, const void *__restrict bb, size_t n) { @@ -65,29 +62,80 @@ small: if (unlikely(lend - la > 8)) { while (lend - la > 8) { - long b0 = *lb++; - long b1 = *lb++; - long b2 = *lb++; - long b3 = *lb++; - long b4 = *lb++; - long b5 = *lb++; - long b6 = *lb++; - long b7 = *lb++; - long b8 = *lb++; - *la++ = b0; - *la++ = b1; - *la++ = b2; - *la++ = b3; - *la++ = b4; - *la++ = b5; - *la++ = b6; - *la++ = b7; - *la++ = b8; + /* + * long b0 = *lb++; + * long b1 = *lb++; + * long b2 = *lb++; + * long b3 = *lb++; + * long b4 = *lb++; + * long b5 = *lb++; + * long b6 = *lb++; + * long b7 = *lb++; + * long b8 = *lb++; + * *la++ = b0; + * *la++ = b1; + * *la++ = b2; + * *la++ = b3; + * *la++ = b4; + * *la++ = b5; + * *la++ = b6; + * *la++ = b7; + * *la++ = b8; + */ + long src0, src1, src2, src3; + long src4, src5, src6, src7; + long src8; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%10)\n" // scr0 = lb[0]; + "lw %1, 4(%10)\n" // scr1 = lb[1]; + "lw %2, 8(%10)\n" // scr2 = lb[2]; + "lw %3, 12(%10)\n" // scr3 = lb[3]; + "lw %4, 16(%10)\n" // scr4 = lb[4]; + "lw %5, 20(%10)\n" // scr5 = lb[5]; + "lw %6, 24(%10)\n" // scr6 = lb[6]; + "lw %7, 28(%10)\n" // scr7 = lb[7]; + "lw %8, 32(%10)\n" // scr8 = lb[8]; + "addi %9, %9, 36\n" // la += 8 * 9; + "addi %10, %10, 36\n" // lb += 8 * 9; + "sw %0, -36(%9)\n" // *(la - 9) = src; + "sw %1, -32(%9)\n" // *(la - 8) = src; + "sw %2, -28(%9)\n" // *(la - 7) = src; + "sw %3, -24(%9)\n" // *(la - 6) = src; + "sw %4, -20(%9)\n" // *(la - 5) = src; + "sw %5, -16(%9)\n" // *(la - 4) = src; + "sw %6, -12(%9)\n" // *(la - 3) = src; + "sw %7, -8(%9)\n" // *(la - 2) = src; + "sw %8, -4(%9)\n" // *(la - 1) = src; + : "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3), + "=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7), + "=r"(src8), + "+r"(la), "+r"(lb) + :: "memory"); } } - while (la < lend) { - BODY(la, lb, long); + /* + * BODY(la, lb, long); + */ + long src0; +#ifdef __OPTIMIZE_SIZE__ +#error "Enabled Os optimization may not work properly for DIG-694" + /* + * Replacing the string: + * *la++ = tt; + * To: + * "addi %2, %4, 4\n" // la++; + * "sw %0, -4(%4)\n" // *(la-1) = src0; + * May break some optimizations and slightly reduce performance. + */ +#endif + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%1)\n" // long src0 = *lb; + "addi %1, %1, 4\n" // lb++; + "addi %2, %2, 4\n" // la++; + "sw %0, -4(%2)\n" // *(la-1) = src0; + : "=r"(src0), "+r"(lb), "+r"(la) + :: "memory"); } a = (char *)la; diff --git a/components/newlib/src/port/riscv/memmove.c b/components/newlib/src/port/riscv/memmove.c new file mode 100644 index 0000000000..208e5953cc --- /dev/null +++ b/components/newlib/src/port/riscv/memmove.c @@ -0,0 +1,150 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include <_ansi.h> +#include +#include +#include "string/local.h" + +void * +__inhibit_loop_to_libcall +memmove(void *dst_void, + const void *src_void, + size_t length) +{ + char *dst = dst_void; + const char *src = src_void; + long *aligned_dst; + const long *aligned_src; + + if (src < dst && dst < src + length) { + /* Destructive overlap...have to copy backwards */ + src += length; + dst += length; + + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + /* + * const long src0 = *--aligned_src; + * *--aligned_dst = src0; + * length -= LITTLE_BLOCK_SIZE; + */ + long src0; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, -4(%1)\n" // src0 = *(aligned_src - 1); + "addi %1, %1, -4\n" // aligned_src--; + "addi %2, %2, -4\n" // aligned_dst--; + "addi %3, %3, -4\n" // length -= LITTLE_BLOCK_SIZE; + "sw %0, 0(%2)\n" // aligned_dst = src0; + : "=r"(src0), "+r"(aligned_src), "+r"(aligned_dst), "+r"(length) + :: "memory"); + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *--dst = *--src; + } + } else { + /* Use optimizing algorithm for a non-destructive copy to closely + match memcpy. If the size is small or either SRC or DST is unaligned, + then punt into the byte copy loop. This should be rare. */ + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy 8X long words at a time if possible. */ + while (length >= BIG_BLOCK_SIZE * 2) { + /* + * const long src0 = *aligned_src++; + * const long src1 = *aligned_src++; + * const long src2 = *aligned_src++; + * const long src3 = *aligned_src++; + * const long src4 = *aligned_src++; + * const long src5 = *aligned_src++; + * const long src6 = *aligned_src++; + * const long src7 = *aligned_src++; + * *aligned_dst++ = src0; + * *aligned_dst++ = src1; + * *aligned_dst++ = src2; + * *aligned_dst++ = src3; + * *aligned_dst++ = src4; + * *aligned_dst++ = src5; + * *aligned_dst++ = src6; + * *aligned_dst++ = src7; + */ + long src0, src1, src2, src3; + long src4, src5, src6, src7; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%8)\n" // src0 = aligned_src[0]; + "lw %1, 4(%8)\n" // src1 = aligned_src[1]; + "lw %2, 8(%8)\n" // src2 = aligned_src[2]; + "lw %3, 12(%8)\n" // src3 = aligned_src[3]; + "lw %4, 16(%8)\n" // src4 = aligned_src[4]; + "lw %5, 20(%8)\n" // src5 = aligned_src[5]; + "lw %6, 24(%8)\n" // src6 = aligned_src[6]; + "lw %7, 28(%8)\n" // src7 = aligned_src[7]; + "addi %8, %8, 32\n" // aligned_src += BIG_BLOCK_SIZE * 2; + "addi %9, %9, 32\n" // aligned_dst += BIG_BLOCK_SIZE * 2; + "addi %10, %10, -32\n" // length -= BIG_BLOCK_SIZE * 2; + "sw %0, -32(%9)\n" // *(aligned_dst - 8) = src0; + "sw %1, -28(%9)\n" // *(aligned_dst - 7) = src1; + "sw %2, -24(%9)\n" // *(aligned_dst - 6) = src2; + "sw %3, -20(%9)\n" // *(aligned_dst - 5) = src3; + "sw %4, -16(%9)\n" // *(aligned_dst - 4) = src4; + "sw %5, -12(%9)\n" // *(aligned_dst - 3) = src5; + "sw %6, -8(%9)\n" // *(aligned_dst - 2) = src6; + "sw %7, -4(%9)\n" // *(aligned_dst - 1) = src7; + : "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3), + "=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7), + "+r"(aligned_src), "+r"(aligned_dst), "+r"(length) + :: "memory"); + } + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + /* + * const long src0 = *aligned_src++; + * *aligned_dst++ = src0; + * length -= LITTLE_BLOCK_SIZE; + */ + long src0; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%4)\n" // long src0 = *aligned_src; + "addi %1, %4, 4\n" // aligned_src++; + "addi %2, %5, 4\n" // aligned_dst++; + "addi %3, %6, -4\n" // length -= LITTLE_BLOCK_SIZE; + "sw %0, -4(%5)\n" // *(aligned_dst-1) = src0; + : "=r"(src0), "=r"(aligned_src), "=r"(aligned_dst), "=r"(length) + : "r"(aligned_src), "r"(aligned_dst), "r"(length)); + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *dst++ = *src++; + } + } + + return dst_void; +} + +// Hook to force the linker to include this file +void esp_libc_include_memmove_impl(void) +{ +} diff --git a/components/newlib/src/port/riscv/strcpy.c b/components/newlib/src/port/riscv/strcpy.c index 361a04baa1..1884218729 100644 --- a/components/newlib/src/port/riscv/strcpy.c +++ b/components/newlib/src/port/riscv/strcpy.c @@ -30,7 +30,6 @@ unsigned long __newlib__libc_detect_null(unsigned long w) return ~(((w & mask) + mask) | w | mask); } -__attribute__((optimize("-Os"))) char *strcpy(char *dst, const char *src) { char *dst0 = dst; @@ -44,6 +43,7 @@ char *strcpy(char *dst, const char *src) const long *lsrc = (const long *)src; while (!__newlib__libc_detect_null(*lsrc)) { + /* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */ *ldst++ = *lsrc++; } diff --git a/components/newlib/src/string/memcmp.c b/components/newlib/src/string/memcmp.c index 0a26ca8cd4..4445d09da2 100644 --- a/components/newlib/src/string/memcmp.c +++ b/components/newlib/src/string/memcmp.c @@ -6,9 +6,8 @@ * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD */ #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) int memcmp(const void *m1, const void *m2, diff --git a/components/newlib/src/string/memmove.c b/components/newlib/src/string/memmove.c deleted file mode 100644 index 57071ddc09..0000000000 --- a/components/newlib/src/string/memmove.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. - * - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 - * - * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD - */ -#include -#include <_ansi.h> -#include -#include -#include "local.h" - -__attribute__((optimize("-Os"))) -void * -__inhibit_loop_to_libcall -memmove(void *dst_void, - const void *src_void, - size_t length) -{ - char *dst = dst_void; - const char *src = src_void; - long *aligned_dst; - const long *aligned_src; - - if (src < dst && dst < src + length) { - /* Destructive overlap...have to copy backwards */ - src += length; - dst += length; - - if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy one long word at a time if possible. */ - while (!TOO_SMALL_LITTLE_BLOCK(length)) { - *--aligned_dst = *--aligned_src; - length -= LITTLE_BLOCK_SIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (length--) { - *--dst = *--src; - } - } else { - /* Use optimizing algorithm for a non-destructive copy to closely - match memcpy. If the size is small or either SRC or DST is unaligned, - then punt into the byte copy loop. This should be rare. */ - if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy 4X long words at a time if possible. */ - while (!TOO_SMALL_BIG_BLOCK(length)) { - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - length -= BIG_BLOCK_SIZE; - } - - /* Copy one long word at a time if possible. */ - while (!TOO_SMALL_LITTLE_BLOCK(length)) { - *aligned_dst++ = *aligned_src++; - length -= LITTLE_BLOCK_SIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (length--) { - *dst++ = *src++; - } - } - - return dst_void; -} - -// Hook to force the linker to include this file -void esp_libc_include_memmove_impl(void) -{ -} diff --git a/components/newlib/src/string/strncmp.c b/components/newlib/src/string/strncmp.c index f0efd62189..ed71b484e7 100644 --- a/components/newlib/src/string/strncmp.c +++ b/components/newlib/src/string/strncmp.c @@ -7,9 +7,8 @@ */ #include #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) int strncmp(const char *s1, const char *s2, diff --git a/components/newlib/src/string/strncpy.c b/components/newlib/src/string/strncpy.c index 5821e1997b..0916244d35 100644 --- a/components/newlib/src/string/strncpy.c +++ b/components/newlib/src/string/strncpy.c @@ -7,9 +7,8 @@ */ #include #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) char * strncpy(char *__restrict dst0, const char *__restrict src0, @@ -29,6 +28,7 @@ strncpy(char *__restrict dst0, sized copies. */ while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) { count -= sizeof(long int); + /* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */ *aligned_dst++ = *aligned_src++; } diff --git a/components/newlib/test_apps/newlib/main/CMakeLists.txt b/components/newlib/test_apps/newlib/main/CMakeLists.txt index d91fc695dc..474b1050f9 100644 --- a/components/newlib/test_apps/newlib/main/CMakeLists.txt +++ b/components/newlib/test_apps/newlib/main/CMakeLists.txt @@ -18,14 +18,9 @@ if(CONFIG_LIBC_NEWLIB) endif() if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - list(APPEND srcs "test_misaligned_mem_performance.c") + list(APPEND srcs "test_misaligned_access.c") endif() idf_component_register(SRCS "${srcs}" PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram WHOLE_ARCHIVE) - -if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - set_source_files_properties(test_misaligned_mem_performance.c - PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes") -endif() diff --git a/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c b/components/newlib/test_apps/newlib/main/test_misaligned_access.c similarity index 63% rename from components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c rename to components/newlib/test_apps/newlib/main/test_misaligned_access.c index cc9ab315d6..0ee1f0f7b3 100644 --- a/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c +++ b/components/newlib/test_apps/newlib/main/test_misaligned_access.c @@ -6,11 +6,16 @@ #include #include #include "esp_heap_caps.h" +#include "soc/soc.h" #include "hal/cpu_ll.h" #include "unity.h" #define MAX_MEMTEST_SIZE 4096 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wincompatible-pointer-types" +#pragma GCC diagnostic ignored "-Wstrict-prototypes" + uint32_t test_function_dest_src_size(void (*foo)(...), bool pass_size) { uint32_t ccount1, ccount2; @@ -106,3 +111,52 @@ TEST_CASE("strncmp", "[misaligned_mem]") /* esp32c2: 24369 cycles instead 49141. */ TEST_ASSERT_LESS_THAN(30000, ccount); } +#pragma GCC diagnostic pop // "-Wincompatible-pointer-types" "-Wstrict-prototypes" + +static bool fn_in_ram(void *fn) +{ + const int fnaddr = (int)fn; + return (fnaddr >= SOC_IRAM_LOW && fnaddr < SOC_IRAM_HIGH); +} + +TEST_CASE("mem functions in IRAM", "[misaligned_mem]") +{ + TEST_ASSERT_TRUE(fn_in_ram(memcpy)); + TEST_ASSERT_TRUE(fn_in_ram(memcmp)); + TEST_ASSERT_TRUE(fn_in_ram(memmove)); + TEST_ASSERT_TRUE(fn_in_ram(strcpy)); + TEST_ASSERT_TRUE(fn_in_ram(strncpy)); + TEST_ASSERT_TRUE(fn_in_ram(strcmp)); + TEST_ASSERT_TRUE(fn_in_ram(strncmp)); +} + +#if CONFIG_ESP_SYSTEM_MEMPROT_PMP +TEST_CASE("access across different PMP regions", "[misaligned_mem]") +{ + /* + * PMP configurations for load and store addresses may + * have different permissions (e.g., "R" vs. "RW"). + * + * Due to the timing alignment of internal signals, the address + * permission check may be incorrectly applied during the second + * part of a misaligned access transaction. + * + * As a workaround, insert two instructions (e.g. ADDI/NOP) between + * accessing to different memory regions. This spacing avoids the + * false permission check caused by signal timing overlap. + * + * This test may help identify the root issue in affected chips. + */ + + const void *src = (void *) SOC_DROM_MASK_LOW; + asm volatile("addi sp, sp, -16\n" + "lw t0, 2(%0)\n" +#if CONFIG_SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + "nop\n" + "nop\n" +#endif + "sw t0, 3(sp)\n" + "addi sp, sp, 16" + :: "r"(src) : "memory"); +} +#endif diff --git a/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem index 17a61e401a..f5a15c7bc0 100644 --- a/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem +++ b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem @@ -1 +1,2 @@ CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y +CONFIG_ESP_SYSTEM_MEMPROT_PMP=y diff --git a/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in index d0d74a5292..60b9c54c59 100644 --- a/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in @@ -263,6 +263,10 @@ config SOC_CPU_IDRAM_SPLIT_USING_PMP bool default y +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK bool default y diff --git a/components/soc/esp32c2/include/soc/soc_caps.h b/components/soc/esp32c2/include/soc/soc_caps.h index facf692980..af19a55d4d 100644 --- a/components/soc/esp32c2/include/soc/soc_caps.h +++ b/components/soc/esp32c2/include/soc/soc_caps.h @@ -112,6 +112,8 @@ #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- ECC CAPS --------------------------*/ #define SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK 1 // C2 ECC peripheral has a bug in ECC point verification, if value of K is zero the verification fails diff --git a/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in index 07d3891d96..b0ce79412c 100644 --- a/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in @@ -351,6 +351,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE hex default 0x80000000 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 3072 diff --git a/components/soc/esp32c3/include/soc/soc_caps.h b/components/soc/esp32c3/include/soc/soc_caps.h index f56e816ff8..eaae1d8db9 100644 --- a/components/soc/esp32c3/include/soc/soc_caps.h +++ b/components/soc/esp32c3/include/soc/soc_caps.h @@ -143,6 +143,8 @@ #define SOC_CPU_BREAKPOINTS_NUM 8 #define SOC_CPU_WATCHPOINTS_NUM 8 #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /** The maximum length of a Digital Signature in bits. */ diff --git a/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in index ac19a476db..fe2e6264f5 100644 --- a/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in @@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 3072 diff --git a/components/soc/esp32c6/include/soc/soc_caps.h b/components/soc/esp32c6/include/soc/soc_caps.h index c59db3e62d..2906c6a1c7 100644 --- a/components/soc/esp32c6/include/soc/soc_caps.h +++ b/components/soc/esp32c6/include/soc/soc_caps.h @@ -163,6 +163,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 // TODO: IDF-5360 (Copy from esp32c3, need check) /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ diff --git a/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in b/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in index b7a521cf58..0db2579197 100644 --- a/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in @@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_MMU_PAGE_SIZE_CONFIGURABLE bool default y diff --git a/components/soc/esp32h2/include/soc/soc_caps.h b/components/soc/esp32h2/include/soc/soc_caps.h index 30a8fea465..feeb153f2c 100644 --- a/components/soc/esp32h2/include/soc/soc_caps.h +++ b/components/soc/esp32h2/include/soc/soc_caps.h @@ -180,6 +180,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- MMU CAPS ----------------------------------------*/ #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1) diff --git a/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in b/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in index 6d7f798d0d..1508085055 100644 --- a/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in @@ -283,6 +283,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_MMU_PAGE_SIZE_CONFIGURABLE bool default y diff --git a/components/soc/esp32h21/include/soc/soc_caps.h b/components/soc/esp32h21/include/soc/soc_caps.h index ce0ac6b56f..a9a5402f90 100644 --- a/components/soc/esp32h21/include/soc/soc_caps.h +++ b/components/soc/esp32h21/include/soc/soc_caps.h @@ -165,6 +165,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- MMU CAPS ----------------------------------------*/ #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1)