fix(newlib): fix CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS for c2/c3/c6/h2/h21

PMP configurations for load and store addresses may
have different permissions (e.g., "R" vs. "RW").

Due to the timing alignment of internal signals, the address
permission check may be incorrectly applied during the second
part of a misaligned access transaction.

As a workaround, insert two instructions (e.g. ADDI/NOP) between
accessing to different memory regions. This spacing avoids the
false permission check caused by signal timing overlap.
This commit is contained in:
Alexey Lapshin
2025-08-17 11:52:05 +07:00
committed by BOT
parent 453bd60576
commit 913d38ba14
27 changed files with 356 additions and 164 deletions

View File

@@ -149,6 +149,10 @@ if(ESP_TEE_BUILD)
if(CONFIG_ESP_ROM_HAS_NEWLIB_NANO_FORMAT) if(CONFIG_ESP_ROM_HAS_NEWLIB_NANO_FORMAT)
rom_linker_script("newlib-nano") rom_linker_script("newlib-nano")
endif() endif()
rom_linker_script("libc")
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY) # TODO IDF-13852: use optimized memcpy for TEE ?
rom_linker_script("libc-suboptimal_for_misaligned_mem")
endif()
endif() endif()
if(BOOTLOADER_BUILD) if(BOOTLOADER_BUILD)
@@ -173,8 +177,8 @@ if(BOOTLOADER_BUILD)
else() else()
rom_linker_script("libc") rom_linker_script("libc")
endif() endif()
if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY if(CONFIG_ESP_ROM_HAS_SUBOPTIMAL_NEWLIB_ON_MISALIGNED_MEMORY)
AND NOT CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) # For bootloader use only ROM functions due to 'relocation truncated to fit' error
rom_linker_script("libc-suboptimal_for_misaligned_mem") rom_linker_script("libc-suboptimal_for_misaligned_mem")
endif() endif()
if(CONFIG_LIBC_NEWLIB) if(CONFIG_LIBC_NEWLIB)

View File

@@ -38,23 +38,21 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND)
endif() endif()
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
if(CMAKE_C_COMPILER_ID MATCHES "Clang") list(APPEND srcs
# TODO IDF-13089: remove this block and modify src/newlib.lf when clang was upgraded "src/port/riscv/memcpy.c"
list(APPEND srcs "src/port/riscv/memmove.c"
"src/string/memcmp.c" "src/string/memcmp.c"
"src/string/memmove.c" "src/port/riscv/strcpy.c"
"src/string/strncmp.c" "src/string/strncpy.c"
"src/string/strncpy.c" "src/string/strncmp.c"
"src/port/riscv/memcpy.c" "src/port/riscv/strcmp.S")
"src/port/riscv/strcpy.c" list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcpy_impl")
"src/port/riscv/strcmp.S") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcmp_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memmove_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncmp_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl")
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcmp_impl")
endif()
endif() endif()
if(CONFIG_LIBC_NEWLIB) if(CONFIG_LIBC_NEWLIB)
@@ -96,6 +94,17 @@ if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND)
PROPERTIES COMPILE_FLAGS "-mno-disable-hardware-atomics") PROPERTIES COMPILE_FLAGS "-mno-disable-hardware-atomics")
endif() endif()
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
# TODO GCC-419 and IDF-13089: cleanup files
set_source_files_properties("src/string/memcmp.c"
"src/string/strncmp.c"
"src/string/strncpy.c"
"src/port/riscv/memcpy.c"
"src/port/riscv/memmove.c"
"src/port/riscv/strcpy.c"
PROPERTIES COMPILE_FLAGS -O2)
endif()
# Forces the linker to include heap, syscall, pthread, assert, and retargetable locks from this component, # Forces the linker to include heap, syscall, pthread, assert, and retargetable locks from this component,
# instead of the implementations provided by newlib. # instead of the implementations provided by newlib.
list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_heap_impl") list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_heap_impl")

View File

@@ -151,7 +151,7 @@ menu "LibC"
Enables performance-optimized implementations of memory and string functions Enables performance-optimized implementations of memory and string functions
when handling misaligned memory. when handling misaligned memory.
This increases the image size by ~1000 bytes. Require approximately 8001000 bytes of IRAM.
Optimized functions include: Optimized functions include:
- memcpy - memcpy

View File

@@ -22,7 +22,9 @@
* to avoid small performance penalties (if they are not zero). */ * to avoid small performance penalties (if they are not zero). */
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1)) #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
#ifndef _HAVE_HW_MISALIGNED_ACCESS
#define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow) #define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow)
#endif
#if _HAVE_HW_MISALIGNED_ACCESS #if _HAVE_HW_MISALIGNED_ACCESS
/* Hardware performs unaligned operations with little /* Hardware performs unaligned operations with little

View File

@@ -11,8 +11,6 @@ archive:
else: else:
libc.a libc.a
entries: entries:
if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y:
libc_a-memcpy (noflash)
if SPIRAM_CACHE_WORKAROUND = y: if SPIRAM_CACHE_WORKAROUND = y:
# The following libs are either used in a lot of places or in critical # The following libs are either used in a lot of places or in critical
# code. (such as panic or abort) # code. (such as panic or abort)

View File

@@ -1,6 +1,14 @@
[mapping:newlib] [mapping:newlib]
archive: libnewlib.a archive: libnewlib.a
entries: entries:
if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y:
memcpy (noflash)
memmove (noflash)
memcmp (noflash)
strcpy (noflash)
strncpy (noflash)
strcmp (noflash)
strncmp (noflash)
if LIBC_MISC_IN_IRAM = y: if LIBC_MISC_IN_IRAM = y:
if HEAP_PLACE_FUNCTION_INTO_FLASH = n: if HEAP_PLACE_FUNCTION_INTO_FLASH = n:
heap (noflash) heap (noflash)
@@ -11,16 +19,3 @@ entries:
stdatomic_s32c1i (noflash) stdatomic_s32c1i (noflash)
if STDATOMIC_S32C1I_SPIRAM_WORKAROUND = y: if STDATOMIC_S32C1I_SPIRAM_WORKAROUND = y:
stdatomic_s32c1i (noflash) stdatomic_s32c1i (noflash)
if CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TOOLCHAIN_GCC = y:
# memcpy() should be in IRAM due to early system use
# Others can stay in flash (performance difference is minimal)
#
# Performance Comparison:
# | func | flash | iram |
# |---------|-------|-------|
# | memcmp | 15986 | 15170 |
# | strcpy | 17499 | 16660 |
# | strcmp | 13125 | 11147 |
# | strncpy | 17386 | 16668 |
# | strncmp | 22161 | 21782 |
libc_a-memcpy (noflash)

View File

@@ -8,10 +8,6 @@
archive: archive:
libc.a libc.a
entries: entries:
if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_RISCV = y:
memcpy-asm (noflash)
if LIBC_OPTIMIZED_MISALIGNED_ACCESS = y && IDF_TARGET_ARCH_XTENSA = y:
libc_machine_xtensa_memcpy (noflash)
if SPIRAM_CACHE_WORKAROUND = y: if SPIRAM_CACHE_WORKAROUND = y:
# The following libs are either used in a lot of places or in critical code. (such as panic or abort) # The following libs are either used in a lot of places or in critical code. (such as panic or abort)
# Thus, they shall always be placed in IRAM. # Thus, they shall always be placed in IRAM.

View File

@@ -18,14 +18,11 @@
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include "esp_attr.h" #include "string/local.h"
#include "../../string/local.h"
#define unlikely(X) __builtin_expect (!!(X), 0) #define unlikely(X) __builtin_expect (!!(X), 0)
IRAM_ATTR
void * void *
__attribute__((optimize("-Os")))
__inhibit_loop_to_libcall __inhibit_loop_to_libcall
memcpy(void *__restrict aa, const void *__restrict bb, size_t n) memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
{ {
@@ -65,29 +62,80 @@ small:
if (unlikely(lend - la > 8)) { if (unlikely(lend - la > 8)) {
while (lend - la > 8) { while (lend - la > 8) {
long b0 = *lb++; /*
long b1 = *lb++; * long b0 = *lb++;
long b2 = *lb++; * long b1 = *lb++;
long b3 = *lb++; * long b2 = *lb++;
long b4 = *lb++; * long b3 = *lb++;
long b5 = *lb++; * long b4 = *lb++;
long b6 = *lb++; * long b5 = *lb++;
long b7 = *lb++; * long b6 = *lb++;
long b8 = *lb++; * long b7 = *lb++;
*la++ = b0; * long b8 = *lb++;
*la++ = b1; * *la++ = b0;
*la++ = b2; * *la++ = b1;
*la++ = b3; * *la++ = b2;
*la++ = b4; * *la++ = b3;
*la++ = b5; * *la++ = b4;
*la++ = b6; * *la++ = b5;
*la++ = b7; * *la++ = b6;
*la++ = b8; * *la++ = b7;
* *la++ = b8;
*/
long src0, src1, src2, src3;
long src4, src5, src6, src7;
long src8;
/* DIG-694: need at least 2 instructions between lw and sw */
asm volatile("lw %0, 0(%10)\n" // scr0 = lb[0];
"lw %1, 4(%10)\n" // scr1 = lb[1];
"lw %2, 8(%10)\n" // scr2 = lb[2];
"lw %3, 12(%10)\n" // scr3 = lb[3];
"lw %4, 16(%10)\n" // scr4 = lb[4];
"lw %5, 20(%10)\n" // scr5 = lb[5];
"lw %6, 24(%10)\n" // scr6 = lb[6];
"lw %7, 28(%10)\n" // scr7 = lb[7];
"lw %8, 32(%10)\n" // scr8 = lb[8];
"addi %9, %9, 36\n" // la += 8 * 9;
"addi %10, %10, 36\n" // lb += 8 * 9;
"sw %0, -36(%9)\n" // *(la - 9) = src;
"sw %1, -32(%9)\n" // *(la - 8) = src;
"sw %2, -28(%9)\n" // *(la - 7) = src;
"sw %3, -24(%9)\n" // *(la - 6) = src;
"sw %4, -20(%9)\n" // *(la - 5) = src;
"sw %5, -16(%9)\n" // *(la - 4) = src;
"sw %6, -12(%9)\n" // *(la - 3) = src;
"sw %7, -8(%9)\n" // *(la - 2) = src;
"sw %8, -4(%9)\n" // *(la - 1) = src;
: "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3),
"=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7),
"=r"(src8),
"+r"(la), "+r"(lb)
:: "memory");
} }
} }
while (la < lend) { while (la < lend) {
BODY(la, lb, long); /*
* BODY(la, lb, long);
*/
long src0;
#ifdef __OPTIMIZE_SIZE__
#error "Enabled Os optimization may not work properly for DIG-694"
/*
* Replacing the string:
* *la++ = tt;
* To:
* "addi %2, %4, 4\n" // la++;
* "sw %0, -4(%4)\n" // *(la-1) = src0;
* May break some optimizations and slightly reduce performance.
*/
#endif
/* DIG-694: need at least 2 instructions between lw and sw */
asm volatile("lw %0, 0(%1)\n" // long src0 = *lb;
"addi %1, %1, 4\n" // lb++;
"addi %2, %2, 4\n" // la++;
"sw %0, -4(%2)\n" // *(la-1) = src0;
: "=r"(src0), "+r"(lb), "+r"(la)
:: "memory");
} }
a = (char *)la; a = (char *)la;

View File

@@ -0,0 +1,150 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include <_ansi.h>
#include <stddef.h>
#include <limits.h>
#include "string/local.h"
void *
__inhibit_loop_to_libcall
memmove(void *dst_void,
const void *src_void,
size_t length)
{
char *dst = dst_void;
const char *src = src_void;
long *aligned_dst;
const long *aligned_src;
if (src < dst && dst < src + length) {
/* Destructive overlap...have to copy backwards */
src += length;
dst += length;
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
/*
* const long src0 = *--aligned_src;
* *--aligned_dst = src0;
* length -= LITTLE_BLOCK_SIZE;
*/
long src0;
/* DIG-694: need at least 2 instructions between lw and sw */
asm volatile("lw %0, -4(%1)\n" // src0 = *(aligned_src - 1);
"addi %1, %1, -4\n" // aligned_src--;
"addi %2, %2, -4\n" // aligned_dst--;
"addi %3, %3, -4\n" // length -= LITTLE_BLOCK_SIZE;
"sw %0, 0(%2)\n" // aligned_dst = src0;
: "=r"(src0), "+r"(aligned_src), "+r"(aligned_dst), "+r"(length)
:: "memory");
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*--dst = *--src;
}
} else {
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 8X long words at a time if possible. */
while (length >= BIG_BLOCK_SIZE * 2) {
/*
* const long src0 = *aligned_src++;
* const long src1 = *aligned_src++;
* const long src2 = *aligned_src++;
* const long src3 = *aligned_src++;
* const long src4 = *aligned_src++;
* const long src5 = *aligned_src++;
* const long src6 = *aligned_src++;
* const long src7 = *aligned_src++;
* *aligned_dst++ = src0;
* *aligned_dst++ = src1;
* *aligned_dst++ = src2;
* *aligned_dst++ = src3;
* *aligned_dst++ = src4;
* *aligned_dst++ = src5;
* *aligned_dst++ = src6;
* *aligned_dst++ = src7;
*/
long src0, src1, src2, src3;
long src4, src5, src6, src7;
/* DIG-694: need at least 2 instructions between lw and sw */
asm volatile("lw %0, 0(%8)\n" // src0 = aligned_src[0];
"lw %1, 4(%8)\n" // src1 = aligned_src[1];
"lw %2, 8(%8)\n" // src2 = aligned_src[2];
"lw %3, 12(%8)\n" // src3 = aligned_src[3];
"lw %4, 16(%8)\n" // src4 = aligned_src[4];
"lw %5, 20(%8)\n" // src5 = aligned_src[5];
"lw %6, 24(%8)\n" // src6 = aligned_src[6];
"lw %7, 28(%8)\n" // src7 = aligned_src[7];
"addi %8, %8, 32\n" // aligned_src += BIG_BLOCK_SIZE * 2;
"addi %9, %9, 32\n" // aligned_dst += BIG_BLOCK_SIZE * 2;
"addi %10, %10, -32\n" // length -= BIG_BLOCK_SIZE * 2;
"sw %0, -32(%9)\n" // *(aligned_dst - 8) = src0;
"sw %1, -28(%9)\n" // *(aligned_dst - 7) = src1;
"sw %2, -24(%9)\n" // *(aligned_dst - 6) = src2;
"sw %3, -20(%9)\n" // *(aligned_dst - 5) = src3;
"sw %4, -16(%9)\n" // *(aligned_dst - 4) = src4;
"sw %5, -12(%9)\n" // *(aligned_dst - 3) = src5;
"sw %6, -8(%9)\n" // *(aligned_dst - 2) = src6;
"sw %7, -4(%9)\n" // *(aligned_dst - 1) = src7;
: "=r"(src0), "=r"(src1), "=r"(src2), "=r"(src3),
"=r"(src4), "=r"(src5), "=r"(src6), "=r"(src7),
"+r"(aligned_src), "+r"(aligned_dst), "+r"(length)
:: "memory");
}
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
/*
* const long src0 = *aligned_src++;
* *aligned_dst++ = src0;
* length -= LITTLE_BLOCK_SIZE;
*/
long src0;
/* DIG-694: need at least 2 instructions between lw and sw */
asm volatile("lw %0, 0(%4)\n" // long src0 = *aligned_src;
"addi %1, %4, 4\n" // aligned_src++;
"addi %2, %5, 4\n" // aligned_dst++;
"addi %3, %6, -4\n" // length -= LITTLE_BLOCK_SIZE;
"sw %0, -4(%5)\n" // *(aligned_dst-1) = src0;
: "=r"(src0), "=r"(aligned_src), "=r"(aligned_dst), "=r"(length)
: "r"(aligned_src), "r"(aligned_dst), "r"(length));
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*dst++ = *src++;
}
}
return dst_void;
}
// Hook to force the linker to include this file
void esp_libc_include_memmove_impl(void)
{
}

View File

@@ -30,7 +30,6 @@ unsigned long __newlib__libc_detect_null(unsigned long w)
return ~(((w & mask) + mask) | w | mask); return ~(((w & mask) + mask) | w | mask);
} }
__attribute__((optimize("-Os")))
char *strcpy(char *dst, const char *src) char *strcpy(char *dst, const char *src)
{ {
char *dst0 = dst; char *dst0 = dst;
@@ -44,6 +43,7 @@ char *strcpy(char *dst, const char *src)
const long *lsrc = (const long *)src; const long *lsrc = (const long *)src;
while (!__newlib__libc_detect_null(*lsrc)) { while (!__newlib__libc_detect_null(*lsrc)) {
/* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */
*ldst++ = *lsrc++; *ldst++ = *lsrc++;
} }

View File

@@ -6,9 +6,8 @@
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/ */
#include <string.h> #include <string.h>
#include "local.h" #include "string/local.h"
__attribute__((optimize("-Os")))
int int
memcmp(const void *m1, memcmp(const void *m1,
const void *m2, const void *m2,

View File

@@ -1,88 +0,0 @@
/*
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
*
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
*/
#include <string.h>
#include <_ansi.h>
#include <stddef.h>
#include <limits.h>
#include "local.h"
__attribute__((optimize("-Os")))
void *
__inhibit_loop_to_libcall
memmove(void *dst_void,
const void *src_void,
size_t length)
{
char *dst = dst_void;
const char *src = src_void;
long *aligned_dst;
const long *aligned_src;
if (src < dst && dst < src + length) {
/* Destructive overlap...have to copy backwards */
src += length;
dst += length;
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
*--aligned_dst = *--aligned_src;
length -= LITTLE_BLOCK_SIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*--dst = *--src;
}
} else {
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (!TOO_SMALL_BIG_BLOCK(length)) {
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
length -= BIG_BLOCK_SIZE;
}
/* Copy one long word at a time if possible. */
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
*aligned_dst++ = *aligned_src++;
length -= LITTLE_BLOCK_SIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (length--) {
*dst++ = *src++;
}
}
return dst_void;
}
// Hook to force the linker to include this file
void esp_libc_include_memmove_impl(void)
{
}

View File

@@ -7,9 +7,8 @@
*/ */
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include "local.h" #include "string/local.h"
__attribute__((optimize("-Os")))
int int
strncmp(const char *s1, strncmp(const char *s1,
const char *s2, const char *s2,

View File

@@ -7,9 +7,8 @@
*/ */
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include "local.h" #include "string/local.h"
__attribute__((optimize("-Os")))
char * char *
strncpy(char *__restrict dst0, strncpy(char *__restrict dst0,
const char *__restrict src0, const char *__restrict src0,
@@ -29,6 +28,7 @@ strncpy(char *__restrict dst0,
sized copies. */ sized copies. */
while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) { while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) {
count -= sizeof(long int); count -= sizeof(long int);
/* DIG-694: there are enough instructions between lw and sw after compiler unrolls the loop */
*aligned_dst++ = *aligned_src++; *aligned_dst++ = *aligned_src++;
} }

View File

@@ -18,14 +18,9 @@ if(CONFIG_LIBC_NEWLIB)
endif() endif()
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS) if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
list(APPEND srcs "test_misaligned_mem_performance.c") list(APPEND srcs "test_misaligned_access.c")
endif() endif()
idf_component_register(SRCS "${srcs}" idf_component_register(SRCS "${srcs}"
PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram PRIV_REQUIRES unity vfs cmock driver esp_timer spi_flash test_utils pthread esp_psram
WHOLE_ARCHIVE) WHOLE_ARCHIVE)
if(CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS)
set_source_files_properties(test_misaligned_mem_performance.c
PROPERTIES COMPILE_FLAGS "-Wno-incompatible-pointer-types -Wno-strict-prototypes")
endif()

View File

@@ -6,11 +6,16 @@
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include "esp_heap_caps.h" #include "esp_heap_caps.h"
#include "soc/soc.h"
#include "hal/cpu_ll.h" #include "hal/cpu_ll.h"
#include "unity.h" #include "unity.h"
#define MAX_MEMTEST_SIZE 4096 #define MAX_MEMTEST_SIZE 4096
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
uint32_t test_function_dest_src_size(void (*foo)(...), bool pass_size) uint32_t test_function_dest_src_size(void (*foo)(...), bool pass_size)
{ {
uint32_t ccount1, ccount2; uint32_t ccount1, ccount2;
@@ -106,3 +111,52 @@ TEST_CASE("strncmp", "[misaligned_mem]")
/* esp32c2: 24369 cycles instead 49141. */ /* esp32c2: 24369 cycles instead 49141. */
TEST_ASSERT_LESS_THAN(30000, ccount); TEST_ASSERT_LESS_THAN(30000, ccount);
} }
#pragma GCC diagnostic pop // "-Wincompatible-pointer-types" "-Wstrict-prototypes"
static bool fn_in_ram(void *fn)
{
const int fnaddr = (int)fn;
return (fnaddr >= SOC_IRAM_LOW && fnaddr < SOC_IRAM_HIGH);
}
TEST_CASE("mem functions in IRAM", "[misaligned_mem]")
{
TEST_ASSERT_TRUE(fn_in_ram(memcpy));
TEST_ASSERT_TRUE(fn_in_ram(memcmp));
TEST_ASSERT_TRUE(fn_in_ram(memmove));
TEST_ASSERT_TRUE(fn_in_ram(strcpy));
TEST_ASSERT_TRUE(fn_in_ram(strncpy));
TEST_ASSERT_TRUE(fn_in_ram(strcmp));
TEST_ASSERT_TRUE(fn_in_ram(strncmp));
}
#if CONFIG_ESP_SYSTEM_MEMPROT_PMP
TEST_CASE("access across different PMP regions", "[misaligned_mem]")
{
/*
* PMP configurations for load and store addresses may
* have different permissions (e.g., "R" vs. "RW").
*
* Due to the timing alignment of internal signals, the address
* permission check may be incorrectly applied during the second
* part of a misaligned access transaction.
*
* As a workaround, insert two instructions (e.g. ADDI/NOP) between
* accessing to different memory regions. This spacing avoids the
* false permission check caused by signal timing overlap.
*
* This test may help identify the root issue in affected chips.
*/
const void *src = (void *) SOC_DROM_MASK_LOW;
asm volatile("addi sp, sp, -16\n"
"lw t0, 2(%0)\n"
#if CONFIG_SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
"nop\n"
"nop\n"
#endif
"sw t0, 3(sp)\n"
"addi sp, sp, 16"
:: "r"(src) : "memory");
}
#endif

View File

@@ -1 +1,2 @@
CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS=y
CONFIG_ESP_SYSTEM_MEMPROT_PMP=y

View File

@@ -263,6 +263,10 @@ config SOC_CPU_IDRAM_SPLIT_USING_PMP
bool bool
default y default y
config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
bool
default y
config SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK config SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK
bool bool
default y default y

View File

@@ -112,6 +112,8 @@
#define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes
#define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1
// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions
#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1
/*-------------------------- ECC CAPS --------------------------*/ /*-------------------------- ECC CAPS --------------------------*/
#define SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK 1 // C2 ECC peripheral has a bug in ECC point verification, if value of K is zero the verification fails #define SOC_ECC_SUPPORT_POINT_VERIFY_QUIRK 1 // C2 ECC peripheral has a bug in ECC point verification, if value of K is zero the verification fails

View File

@@ -351,6 +351,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE
hex hex
default 0x80000000 default 0x80000000
config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
bool
default y
config SOC_DS_SIGNATURE_MAX_BIT_LEN config SOC_DS_SIGNATURE_MAX_BIT_LEN
int int
default 3072 default 3072

View File

@@ -143,6 +143,8 @@
#define SOC_CPU_BREAKPOINTS_NUM 8 #define SOC_CPU_BREAKPOINTS_NUM 8
#define SOC_CPU_WATCHPOINTS_NUM 8 #define SOC_CPU_WATCHPOINTS_NUM 8
#define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 0x80000000 // bytes
// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions
#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/
/** The maximum length of a Digital Signature in bits. */ /** The maximum length of a Digital Signature in bits. */

View File

@@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY
int int
default 4 default 4
config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
bool
default y
config SOC_DS_SIGNATURE_MAX_BIT_LEN config SOC_DS_SIGNATURE_MAX_BIT_LEN
int int
default 3072 default 3072

View File

@@ -163,6 +163,8 @@
#define SOC_CPU_HAS_PMA 1 #define SOC_CPU_HAS_PMA 1
#define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1
#define SOC_CPU_PMP_REGION_GRANULARITY 4 #define SOC_CPU_PMP_REGION_GRANULARITY 4
// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions
#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1
// TODO: IDF-5360 (Copy from esp32c3, need check) // TODO: IDF-5360 (Copy from esp32c3, need check)
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/

View File

@@ -427,6 +427,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY
int int
default 4 default 4
config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
bool
default y
config SOC_MMU_PAGE_SIZE_CONFIGURABLE config SOC_MMU_PAGE_SIZE_CONFIGURABLE
bool bool
default y default y

View File

@@ -180,6 +180,8 @@
#define SOC_CPU_HAS_PMA 1 #define SOC_CPU_HAS_PMA 1
#define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1
#define SOC_CPU_PMP_REGION_GRANULARITY 4 #define SOC_CPU_PMP_REGION_GRANULARITY 4
// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions
#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1
/*-------------------------- MMU CAPS ----------------------------------------*/ /*-------------------------- MMU CAPS ----------------------------------------*/
#define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1) #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1)

View File

@@ -283,6 +283,10 @@ config SOC_CPU_PMP_REGION_GRANULARITY
int int
default 4 default 4
config SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE
bool
default y
config SOC_MMU_PAGE_SIZE_CONFIGURABLE config SOC_MMU_PAGE_SIZE_CONFIGURABLE
bool bool
default y default y

View File

@@ -165,6 +165,8 @@
#define SOC_CPU_HAS_PMA 1 #define SOC_CPU_HAS_PMA 1
#define SOC_CPU_IDRAM_SPLIT_USING_PMP 1 #define SOC_CPU_IDRAM_SPLIT_USING_PMP 1
#define SOC_CPU_PMP_REGION_GRANULARITY 4 #define SOC_CPU_PMP_REGION_GRANULARITY 4
// DIG-694: misaligned access across PMP regions must be spaced at least by two instructions
#define SOC_CPU_MISALIGNED_ACCESS_ON_PMP_MISMATCH_ISSUE 1
/*-------------------------- MMU CAPS ----------------------------------------*/ /*-------------------------- MMU CAPS ----------------------------------------*/
#define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1) #define SOC_MMU_PAGE_SIZE_CONFIGURABLE (1)