diff --git a/components/esp_rom/CMakeLists.txt b/components/esp_rom/CMakeLists.txt index 69e01d0abd..cb22d5d2ca 100644 --- a/components/esp_rom/CMakeLists.txt +++ b/components/esp_rom/CMakeLists.txt @@ -149,6 +149,10 @@ if(ESP_TEE_BUILD) if(CONFIG_ESP_ROM_HAS_NEWLIB_NANO_FORMAT) rom_linker_script("newlib-nano") endif() + rom_linker_script("libc") + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) # TODO IDF-13852: use optimized memcpy for TEE ? + rom_linker_script("libc-suboptimal_for_misaligned_mem") + endif() endif() if(BOOTLOADER_BUILD) @@ -173,8 +177,8 @@ if(BOOTLOADER_BUILD) else() rom_linker_script("libc") endif() - if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY - AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) + # For bootloader use only ROM functions due to 'relocation truncated to fit' error rom_linker_script("libc-suboptimal_for_misaligned_mem") endif() if(CONFIG_LIBC_NEWLIB) diff --git a/components/newlib/CMakeLists.txt b/components/newlib/CMakeLists.txt index 1bb182f584..7c8a1faa92 100644 --- a/components/newlib/CMakeLists.txt +++ b/components/newlib/CMakeLists.txt @@ -38,23 +38,21 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) endif() if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - if(CMAKE_C_COMPILER_ID MATCHES "Clang") - # TODO IDF-13089: remove this block and modify src/newlib.lf when clang was upgraded - list(APPEND srcs - "src/string/memcmp.c" - "src/string/memmove.c" - "src/string/strncmp.c" - "src/string/strncpy.c" - "src/port/riscv/memcpy.c" - "src/port/riscv/strcpy.c" - "src/port/riscv/strcmp.S") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") - list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl") - endif() + list(APPEND srcs + "src/port/riscv/memcpy.c" + "src/port/riscv/memmove.c" + "src/string/memcmp.c" + "src/port/riscv/strcpy.c" + "src/string/strncpy.c" + "src/string/strncmp.c" + "src/port/riscv/strcmp.S") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl") endif() if(CONFIG_LIBC_NEWLIB) @@ -96,6 +94,17 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) PROPERTIES COMPILE_FLAGS "-mno-disable-hardware-atomics") endif() +if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) + # TODO GCC-419 and IDF-13089: cleanup files + set_source_files_properties("src/string/memcmp.c" + "src/string/strncmp.c" + "src/string/strncpy.c" + "src/port/riscv/memcpy.c" + "src/port/riscv/memmove.c" + "src/port/riscv/strcpy.c" + PROPERTIES COMPILE_FLAGS -O2) +endif() + # Forces the linker to include heap, syscall, pthread, assert, and retargetable locks from this component, # instead of the implementations provided by newlib. list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_heap_impl") diff --git a/components/newlib/Kconfig b/components/newlib/Kconfig index 9a30784c21..92d9afc412 100644 --- a/components/newlib/Kconfig +++ b/components/newlib/Kconfig @@ -151,7 +151,7 @@ menu "LibC" Enables performance-optimized implementations of memory and string functions when handling misaligned memory. - This increases the image size by ~1000 bytes. + Require approximately 800–1000 bytes of IRAM. Optimized functions include: - memcpy diff --git a/components/newlib/src/string/local.h b/components/newlib/priv_include/string/local.h similarity index 98% rename from components/newlib/src/string/local.h rename to components/newlib/priv_include/string/local.h index bfc1260efc..d6df832838 100644 --- a/components/newlib/src/string/local.h +++ b/components/newlib/priv_include/string/local.h @@ -22,7 +22,9 @@ * to avoid small performance penalties (if they are not zero). */ #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1)) +#ifndef _HAVE_HW_MISALIGNED_ACCESS #define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow) +#endif #if _HAVE_HW_MISALIGNED_ACCESS /* Hardware performs unaligned operations with little diff --git a/components/newlib/src/libc.lf b/components/newlib/src/libc.lf index a8ebcf8930..05fdef2306 100644 --- a/components/newlib/src/libc.lf +++ b/components/newlib/src/libc.lf @@ -11,8 +11,6 @@ archive: else: libc.a entries: - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y: - libc_a-memcpy (noflash) if SPIRAM_CACHE_WORKAROUND = y: # The following libs are either used in a lot of places or in critical # code. (such as panic or abort) diff --git a/components/newlib/src/newlib.lf b/components/newlib/src/newlib.lf index 359acd6345..e311ff5b17 100644 --- a/components/newlib/src/newlib.lf +++ b/components/newlib/src/newlib.lf @@ -1,6 +1,14 @@ [mapping:newlib] archive: libnewlib.a entries: + if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y: + memcpy (noflash) + memmove (noflash) + memcmp (noflash) + strcpy (noflash) + strncpy (noflash) + strcmp (noflash) + strncmp (noflash) if LIBC_MISC_IN_IRAM = y: if HEAP_PLACE_FUNCTION_INTO_FLASH = n: heap (noflash) @@ -11,16 +19,3 @@ entries: stdatomic_s32c1i (noflash) if STDATOMIC_S32C1I_SPIRAM_WORKAROUND = y: stdatomic_s32c1i (noflash) - if CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TOOLCHAIN_GCC = y: - # memcpy() should be in IRAM due to early system use - # Others can stay in flash (performance difference is minimal) - # - # Performance Comparison: - # | func | flash | iram | - # |---------|-------|-------| - # | memcmp | 15986 | 15170 | - # | strcpy | 17499 | 16660 | - # | strcmp | 13125 | 11147 | - # | strncpy | 17386 | 16668 | - # | strncmp | 22161 | 21782 | - libc_a-memcpy (noflash) diff --git a/components/newlib/src/picolibc/libc.lf b/components/newlib/src/picolibc/libc.lf index 0892557473..f2a97b4cc9 100644 --- a/components/newlib/src/picolibc/libc.lf +++ b/components/newlib/src/picolibc/libc.lf @@ -8,10 +8,6 @@ archive: libc.a entries: - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_RISCV = y: - memcpy-asm (noflash) - if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_XTENSA = y: - libc_machine_xtensa_memcpy (noflash) if SPIRAM_CACHE_WORKAROUND = y: # The following libs are either used in a lot of places or in critical code. (such as panic or abort) # Thus, they shall always be placed in IRAM. diff --git a/components/newlib/src/port/riscv/memcpy.c b/components/newlib/src/port/riscv/memcpy.c index bbee1d3f02..6c84199d91 100644 --- a/components/newlib/src/port/riscv/memcpy.c +++ b/components/newlib/src/port/riscv/memcpy.c @@ -18,14 +18,11 @@ #include #include -#include "esp_attr.h" -#include "../../string/local.h" +#include "string/local.h" #define unlikely(X) __builtin_expect (!!(X), 0) -IRAM_ATTR void * -__attribute__((optimize("-Os"))) __inhibit_loop_to_libcall memcpy(void *__restrict aa, const void *__restrict bb, size_t n) { @@ -65,29 +62,80 @@ small: if (unlikely(lend - la > 8)) { while (lend - la > 8) { - long b0 = *lb++; - long b1 = *lb++; - long b2 = *lb++; - long b3 = *lb++; - long b4 = *lb++; - long b5 = *lb++; - long b6 = *lb++; - long b7 = *lb++; - long b8 = *lb++; - *la++ = b0; - *la++ = b1; - *la++ = b2; - *la++ = b3; - *la++ = b4; - *la++ = b5; - *la++ = b6; - *la++ = b7; - *la++ = b8; + /* + * long b0 = *lb++; + * long b1 = *lb++; + * long b2 = *lb++; + * long b3 = *lb++; + * long b4 = *lb++; + * long b5 = *lb++; + * long b6 = *lb++; + * long b7 = *lb++; + * long b8 = *lb++; + * *la++ = b0; + * *la++ = b1; + * *la++ = b2; + * *la++ = b3; + * *la++ = b4; + * *la++ = b5; + * *la++ = b6; + * *la++ = b7; + * *la++ = b8; + */ + long src0, src1, src2, src3; + long src4, src5, src6, src7; + long src8; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%10)\n" // scr0 = lb[0]; + "lw %1, 4(%10)\n" // scr1 = lb[1]; + "lw %2, 8(%10)\n" // scr2 = lb[2]; + "lw %3, 12(%10)\n" // scr3 = lb[3]; + "lw %4, 16(%10)\n" // scr4 = lb[4]; + "lw %5, 20(%10)\n" // scr5 = lb[5]; + "lw %6, 24(%10)\n" // scr6 = lb[6]; + "lw %7, 28(%10)\n" // scr7 = lb[7]; + "lw %8, 32(%10)\n" // scr8 = lb[8]; + "addi %9, %9, 36\n" // la += 8 * 9; + "addi %10, %10, 36\n" // lb += 8 * 9; + "sw %0, -36(%9)\n" // *(la - 9) = src; + "sw %1, -32(%9)\n" // *(la - 8) = src; + "sw %2, -28(%9)\n" // *(la - 7) = src; + "sw %3, -24(%9)\n" // *(la - 6) = src; + "sw %4, -20(%9)\n" // *(la - 5) = src; + "sw %5, -16(%9)\n" // *(la - 4) = src; + "sw %6, -12(%9)\n" // *(la - 3) = src; + "sw %7, -8(%9)\n" // *(la - 2) = src; + "sw %8, -4(%9)\n" // *(la - 1) = src; + : "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3), + "=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7), + "=r"(src8), + "+r"(la), "+r"(lb) + :: "memory"); } } - while (la < lend) { - BODY(la, lb, long); + /* + * BODY(la, lb, long); + */ + long src0; +#ifdef __OPTIMIZE_SIZE__ +#error "Enabled Os optimization may not work properly for DIG-694" + /* + * Replacing the string: + * *la++ = tt; + * To: + * "addi %2, %4, 4\n" // la++; + * "sw %0, -4(%4)\n" // *(la-1) = src0; + * May break some optimizations and slightly reduce performance. + */ +#endif + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%1)\n" // long src0 = *lb; + "addi %1, %1, 4\n" // lb++; + "addi %2, %2, 4\n" // la++; + "sw %0, -4(%2)\n" // *(la-1) = src0; + : "=r"(src0), "+r"(lb), "+r"(la) + :: "memory"); } a = (char *)la; diff --git a/components/newlib/src/port/riscv/memmove.c b/components/newlib/src/port/riscv/memmove.c new file mode 100644 index 0000000000..208e5953cc --- /dev/null +++ b/components/newlib/src/port/riscv/memmove.c @@ -0,0 +1,150 @@ +/* + * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +#include +#include <_ansi.h> +#include +#include +#include "string/local.h" + +void * +__inhibit_loop_to_libcall +memmove(void *dst_void, + const void *src_void, + size_t length) +{ + char *dst = dst_void; + const char *src = src_void; + long *aligned_dst; + const long *aligned_src; + + if (src < dst && dst < src + length) { + /* Destructive overlap...have to copy backwards */ + src += length; + dst += length; + + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + /* + * const long src0 = *--aligned_src; + * *--aligned_dst = src0; + * length -= LITTLE_BLOCK_SIZE; + */ + long src0; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, -4(%1)\n" // src0 = *(aligned_src - 1); + "addi %1, %1, -4\n" // aligned_src--; + "addi %2, %2, -4\n" // aligned_dst--; + "addi %3, %3, -4\n" // length -= LITTLE_BLOCK_SIZE; + "sw %0, 0(%2)\n" // aligned_dst = src0; + : "=r"(src0), "+r"(aligned_src), "+r"(aligned_dst), "+r"(length) + :: "memory"); + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *--dst = *--src; + } + } else { + /* Use optimizing algorithm for a non-destructive copy to closely + match memcpy. If the size is small or either SRC or DST is unaligned, + then punt into the byte copy loop. This should be rare. */ + if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { + aligned_dst = (long*)dst; + aligned_src = (long*)src; + + /* Copy 8X long words at a time if possible. */ + while (length >= BIG_BLOCK_SIZE * 2) { + /* + * const long src0 = *aligned_src++; + * const long src1 = *aligned_src++; + * const long src2 = *aligned_src++; + * const long src3 = *aligned_src++; + * const long src4 = *aligned_src++; + * const long src5 = *aligned_src++; + * const long src6 = *aligned_src++; + * const long src7 = *aligned_src++; + * *aligned_dst++ = src0; + * *aligned_dst++ = src1; + * *aligned_dst++ = src2; + * *aligned_dst++ = src3; + * *aligned_dst++ = src4; + * *aligned_dst++ = src5; + * *aligned_dst++ = src6; + * *aligned_dst++ = src7; + */ + long src0, src1, src2, src3; + long src4, src5, src6, src7; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%8)\n" // src0 = aligned_src[0]; + "lw %1, 4(%8)\n" // src1 = aligned_src[1]; + "lw %2, 8(%8)\n" // src2 = aligned_src[2]; + "lw %3, 12(%8)\n" // src3 = aligned_src[3]; + "lw %4, 16(%8)\n" // src4 = aligned_src[4]; + "lw %5, 20(%8)\n" // src5 = aligned_src[5]; + "lw %6, 24(%8)\n" // src6 = aligned_src[6]; + "lw %7, 28(%8)\n" // src7 = aligned_src[7]; + "addi %8, %8, 32\n" // aligned_src += BIG_BLOCK_SIZE * 2; + "addi %9, %9, 32\n" // aligned_dst += BIG_BLOCK_SIZE * 2; + "addi %10, %10, -32\n" // length -= BIG_BLOCK_SIZE * 2; + "sw %0, -32(%9)\n" // *(aligned_dst - 8) = src0; + "sw %1, -28(%9)\n" // *(aligned_dst - 7) = src1; + "sw %2, -24(%9)\n" // *(aligned_dst - 6) = src2; + "sw %3, -20(%9)\n" // *(aligned_dst - 5) = src3; + "sw %4, -16(%9)\n" // *(aligned_dst - 4) = src4; + "sw %5, -12(%9)\n" // *(aligned_dst - 3) = src5; + "sw %6, -8(%9)\n" // *(aligned_dst - 2) = src6; + "sw %7, -4(%9)\n" // *(aligned_dst - 1) = src7; + : "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3), + "=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7), + "+r"(aligned_src), "+r"(aligned_dst), "+r"(length) + :: "memory"); + } + + /* Copy one long word at a time if possible. */ + while (!TOO_SMALL_LITTLE_BLOCK(length)) { + /* + * const long src0 = *aligned_src++; + * *aligned_dst++ = src0; + * length -= LITTLE_BLOCK_SIZE; + */ + long src0; + /* DIG-694: need at least 2 instructions between lw and sw */ + asm volatile("lw %0, 0(%4)\n" // long src0 = *aligned_src; + "addi %1, %4, 4\n" // aligned_src++; + "addi %2, %5, 4\n" // aligned_dst++; + "addi %3, %6, -4\n" // length -= LITTLE_BLOCK_SIZE; + "sw %0, -4(%5)\n" // *(aligned_dst-1) = src0; + : "=r"(src0), "=r"(aligned_src), "=r"(aligned_dst), "=r"(length) + : "r"(aligned_src), "r"(aligned_dst), "r"(length)); + } + + /* Pick up any residual with a byte copier. */ + dst = (char*)aligned_dst; + src = (char*)aligned_src; + } + + while (length--) { + *dst++ = *src++; + } + } + + return dst_void; +} + +// Hook to force the linker to include this file +void esp_libc_include_memmove_impl(void) +{ +} diff --git a/components/newlib/src/port/riscv/strcpy.c b/components/newlib/src/port/riscv/strcpy.c index 361a04baa1..1884218729 100644 --- a/components/newlib/src/port/riscv/strcpy.c +++ b/components/newlib/src/port/riscv/strcpy.c @@ -30,7 +30,6 @@ unsigned long __newlib__libc_detect_null(unsigned long w) return ~(((w & mask) + mask) | w | mask); } -__attribute__((optimize("-Os"))) char *strcpy(char *dst, const char *src) { char *dst0 = dst; @@ -44,6 +43,7 @@ char *strcpy(char *dst, const char *src) const long *lsrc = (const long *)src; while (!__newlib__libc_detect_null(*lsrc)) { + /* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */ *ldst++ = *lsrc++; } diff --git a/components/newlib/src/string/memcmp.c b/components/newlib/src/string/memcmp.c index 0a26ca8cd4..4445d09da2 100644 --- a/components/newlib/src/string/memcmp.c +++ b/components/newlib/src/string/memcmp.c @@ -6,9 +6,8 @@ * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD */ #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) int memcmp(const void *m1, const void *m2, diff --git a/components/newlib/src/string/memmove.c b/components/newlib/src/string/memmove.c deleted file mode 100644 index 57071ddc09..0000000000 --- a/components/newlib/src/string/memmove.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc. - * - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0 - * - * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD - */ -#include -#include <_ansi.h> -#include -#include -#include "local.h" - -__attribute__((optimize("-Os"))) -void * -__inhibit_loop_to_libcall -memmove(void *dst_void, - const void *src_void, - size_t length) -{ - char *dst = dst_void; - const char *src = src_void; - long *aligned_dst; - const long *aligned_src; - - if (src < dst && dst < src + length) { - /* Destructive overlap...have to copy backwards */ - src += length; - dst += length; - - if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy one long word at a time if possible. */ - while (!TOO_SMALL_LITTLE_BLOCK(length)) { - *--aligned_dst = *--aligned_src; - length -= LITTLE_BLOCK_SIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (length--) { - *--dst = *--src; - } - } else { - /* Use optimizing algorithm for a non-destructive copy to closely - match memcpy. If the size is small or either SRC or DST is unaligned, - then punt into the byte copy loop. This should be rare. */ - if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) { - aligned_dst = (long*)dst; - aligned_src = (long*)src; - - /* Copy 4X long words at a time if possible. */ - while (!TOO_SMALL_BIG_BLOCK(length)) { - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - *aligned_dst++ = *aligned_src++; - length -= BIG_BLOCK_SIZE; - } - - /* Copy one long word at a time if possible. */ - while (!TOO_SMALL_LITTLE_BLOCK(length)) { - *aligned_dst++ = *aligned_src++; - length -= LITTLE_BLOCK_SIZE; - } - - /* Pick up any residual with a byte copier. */ - dst = (char*)aligned_dst; - src = (char*)aligned_src; - } - - while (length--) { - *dst++ = *src++; - } - } - - return dst_void; -} - -// Hook to force the linker to include this file -void esp_libc_include_memmove_impl(void) -{ -} diff --git a/components/newlib/src/string/strncmp.c b/components/newlib/src/string/strncmp.c index f0efd62189..ed71b484e7 100644 --- a/components/newlib/src/string/strncmp.c +++ b/components/newlib/src/string/strncmp.c @@ -7,9 +7,8 @@ */ #include #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) int strncmp(const char *s1, const char *s2, diff --git a/components/newlib/src/string/strncpy.c b/components/newlib/src/string/strncpy.c index 5821e1997b..0916244d35 100644 --- a/components/newlib/src/string/strncpy.c +++ b/components/newlib/src/string/strncpy.c @@ -7,9 +7,8 @@ */ #include #include -#include "local.h" +#include "string/local.h" -__attribute__((optimize("-Os"))) char * strncpy(char *__restrict dst0, const char *__restrict src0, @@ -29,6 +28,7 @@ strncpy(char *__restrict dst0, sized copies. */ while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) { count -= sizeof(long int); + /* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */ *aligned_dst++ = *aligned_src++; } diff --git a/components/newlib/test_apps/newlib/main/CMakeLists.txt b/components/newlib/test_apps/newlib/main/CMakeLists.txt index d91fc695dc..474b1050f9 100644 --- a/components/newlib/test_apps/newlib/main/CMakeLists.txt +++ b/components/newlib/test_apps/newlib/main/CMakeLists.txt @@ -18,14 +18,9 @@ if(CONFIG_LIBC_NEWLIB) endif() if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - list(APPEND srcs "test_misaligned_mem_performance.c") + list(APPEND srcs "test_misaligned_access.c") endif() idf_component_register(SRCS "${srcs}" PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram WHOLE_ARCHIVE) - -if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) - set_source_files_properties(test_misaligned_mem_performance.c - PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes") -endif() diff --git a/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c b/components/newlib/test_apps/newlib/main/test_misaligned_access.c similarity index 63% rename from components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c rename to components/newlib/test_apps/newlib/main/test_misaligned_access.c index cc9ab315d6..0ee1f0f7b3 100644 --- a/components/newlib/test_apps/newlib/main/test_misaligned_mem_performance.c +++ b/components/newlib/test_apps/newlib/main/test_misaligned_access.c @@ -6,11 +6,16 @@ #include #include #include "esp_heap_caps.h" +#include "soc/soc.h" #include "hal/cpu_ll.h" #include "unity.h" #define MAX_MEMTEST_SIZE 4096 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wincompatible-pointer-types" +#pragma GCC diagnostic ignored "-Wstrict-prototypes" + uint32_t test_function_dest_src_size(void (*foo)(...), bool pass_size) { uint32_t ccount1, ccount2; @@ -106,3 +111,52 @@ TEST_CASE("strncmp", "[misaligned_mem]") /* esp32c2: 24369 cycles instead 49141. */ TEST_ASSERT_LESS_THAN(30000, ccount); } +#pragma GCC diagnostic pop // "-Wincompatible-pointer-types" "-Wstrict-prototypes" + +static bool fn_in_ram(void *fn) +{ + const int fnaddr = (int)fn; + return (fnaddr >= SOC_IRAM_LOW && fnaddr < SOC_IRAM_HIGH); +} + +TEST_CASE("mem functions in IRAM", "[misaligned_mem]") +{ + TEST_ASSERT_TRUE(fn_in_ram(memcpy)); + TEST_ASSERT_TRUE(fn_in_ram(memcmp)); + TEST_ASSERT_TRUE(fn_in_ram(memmove)); + TEST_ASSERT_TRUE(fn_in_ram(strcpy)); + TEST_ASSERT_TRUE(fn_in_ram(strncpy)); + TEST_ASSERT_TRUE(fn_in_ram(strcmp)); + TEST_ASSERT_TRUE(fn_in_ram(strncmp)); +} + +#if CONFIG_ESP_SYSTEM_MEMPROT_PMP +TEST_CASE("access across different PMP regions", "[misaligned_mem]") +{ + /* + * PMP configurations for load and store addresses may + * have different permissions (e.g., "R" vs. "RW"). + * + * Due to the timing alignment of internal signals, the address + * permission check may be incorrectly applied during the second + * part of a misaligned access transaction. + * + * As a workaround, insert two instructions (e.g. ADDI/NOP) between + * accessing to different memory regions. This spacing avoids the + * false permission check caused by signal timing overlap. + * + * This test may help identify the root issue in affected chips. + */ + + const void *src = (void *) SOC_DROM_MASK_LOW; + asm volatile("addi sp, sp, -16\n" + "lw t0, 2(%0)\n" +#if CONFIG_SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + "nop\n" + "nop\n" +#endif + "sw t0, 3(sp)\n" + "addi sp, sp, 16" + :: "r"(src) : "memory"); +} +#endif diff --git a/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem index 17a61e401a..f5a15c7bc0 100644 --- a/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem +++ b/components/newlib/test_apps/newlib/sdkconfig.ci.misaligned_mem @@ -1 +1,2 @@ CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y +CONFIG_ESP_SYSTEM_MEMPROT_PMP=y diff --git a/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in index d0d74a5292..60b9c54c59 100644 --- a/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c2/include/soc/Kconfig.soc_caps.in @@ -263,6 +263,10 @@ config SOC_CPU_IDRAM_SPLIT_USING_PMP bool default y +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK bool default y diff --git a/components/soc/esp32c2/include/soc/soc_caps.h b/components/soc/esp32c2/include/soc/soc_caps.h index facf692980..af19a55d4d 100644 --- a/components/soc/esp32c2/include/soc/soc_caps.h +++ b/components/soc/esp32c2/include/soc/soc_caps.h @@ -112,6 +112,8 @@ #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- ECC CAPS --------------------------*/ #define SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK 1 // C2 ECC peripheral has a bug in ECC point verification, if value of K is zero the verification fails diff --git a/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in index 07d3891d96..b0ce79412c 100644 --- a/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c3/include/soc/Kconfig.soc_caps.in @@ -351,6 +351,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE hex default 0x80000000 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 3072 diff --git a/components/soc/esp32c3/include/soc/soc_caps.h b/components/soc/esp32c3/include/soc/soc_caps.h index f56e816ff8..eaae1d8db9 100644 --- a/components/soc/esp32c3/include/soc/soc_caps.h +++ b/components/soc/esp32c3/include/soc/soc_caps.h @@ -143,6 +143,8 @@ #define SOC_CPU_BREAKPOINTS_NUM 8 #define SOC_CPU_WATCHPOINTS_NUM 8 #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /** The maximum length of a Digital Signature in bits. */ diff --git a/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in b/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in index ac19a476db..fe2e6264f5 100644 --- a/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32c6/include/soc/Kconfig.soc_caps.in @@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 3072 diff --git a/components/soc/esp32c6/include/soc/soc_caps.h b/components/soc/esp32c6/include/soc/soc_caps.h index c59db3e62d..2906c6a1c7 100644 --- a/components/soc/esp32c6/include/soc/soc_caps.h +++ b/components/soc/esp32c6/include/soc/soc_caps.h @@ -163,6 +163,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 // TODO: IDF-5360 (Copy from esp32c3, need check) /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ diff --git a/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in b/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in index b7a521cf58..0db2579197 100644 --- a/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32h2/include/soc/Kconfig.soc_caps.in @@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_MMU_PAGE_SIZE_CONFIGURABLE bool default y diff --git a/components/soc/esp32h2/include/soc/soc_caps.h b/components/soc/esp32h2/include/soc/soc_caps.h index 30a8fea465..feeb153f2c 100644 --- a/components/soc/esp32h2/include/soc/soc_caps.h +++ b/components/soc/esp32h2/include/soc/soc_caps.h @@ -180,6 +180,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- MMU CAPS ----------------------------------------*/ #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1) diff --git a/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in b/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in index 6d7f798d0d..1508085055 100644 --- a/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32h21/include/soc/Kconfig.soc_caps.in @@ -283,6 +283,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY int default 4 +config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE + bool + default y + config SOC_MMU_PAGE_SIZE_CONFIGURABLE bool default y diff --git a/components/soc/esp32h21/include/soc/soc_caps.h b/components/soc/esp32h21/include/soc/soc_caps.h index ce0ac6b56f..a9a5402f90 100644 --- a/components/soc/esp32h21/include/soc/soc_caps.h +++ b/components/soc/esp32h21/include/soc/soc_caps.h @@ -165,6 +165,8 @@ #define SOC_CPU_HAS_PMA 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_PMP_REGION_GRANULARITY 4 +// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions +#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1 /*-------------------------- MMU CAPS ----------------------------------------*/ #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1)