From cbb84e8f5eda40795927507816a1e522a0ee2e0c Mon Sep 17 00:00:00 2001 From: Ivan Grokhotkov Date: Mon, 20 Jan 2020 14:20:02 +0100 Subject: [PATCH] esp32s2: fix THREADPTR calculation, re-enable FreeRTOS TLS tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Clarify THREADPTR calculation in FreeRTOS code, explaining where the constant 0x10 offset comes from. 2. On the ESP32-S2, .flash.rodata section had different default alignment (8 bytes instead of 16), which resulted in different offset of the TLS sections. Unfortunately I haven’t found a way to query section alignment from C code, or to use a constant value to define section alignment in the linker script. The linker scripts are modified to force a fixed 16 byte alignment for .flash.rodata on the ESP32 and ESP32-S2beta. Note that the base address of .flash.rodata was already 16 byte aligned, so this has not changed the actual memory layout of the application. Full explanation of the calculation below. Assume we have the TLS template section base address (tls_section_vma), the address of a TLS variable in the template (address), and the final relocation value (offset). The linker calculates: offset = address - tls_section_vma + align_up(TCB_SIZE, alignment). At run time, the TLS section gets copied from _thread_local_start (in .rodata) to task_thread_local_start. Let’s assume that an address of a variable in the runtime TLS section is runtime_address. Access to this address will happen by calculating THREADPTR + offset. So, by a series of substitutions: THREADPTR + offset = runtime_address THREADPTR = runtime_address - offset THREADPTR = runtime_address - (address - tls_section_vma + align_up(TCB_SIZE, alignment)) THREADPTR = (runtime_address - address) + tls_section_vma - align_up(TCB_SIZE, alignment) The difference between runtime_address and address is same as the difference between task_thread_local_start and _thread_local_start. And tls_section_vma is the address of .rodata section, i.e. _rodata_start. So we arrive to THREADPTR = task_thread_local_start - _thread_local_start + _rodata_start - align_up(TCB_SIZE, alignment). The idea with TCB_SIZE being added to the THREADPTR when computing the relocation was to let the OS save TCB pointer in the TREADPTR register. The location of the run-time TLS section was assumed to be immediately after the TCB, aligned to whatever the section alignment was. However in our case the problem is that the run-time TLS section is stored not next to the TCB, but at the top of the stack. Plus, even if it was stored next to the TCB, the size of a FreeRTOS TCB is not equal to 8 bytes (TCB_SIZE hardcoded in the linker). So we have to calculate THREADPTR in a slightly obscure way, to compensate for these differences. Closes IDF-1239 --- components/esp32/ld/esp32.project.ld.in | 3 ++- components/esp32s2/ld/esp32s2.project.ld.in | 3 ++- components/freertos/port.c | 17 ++++++++++++++--- components/freertos/test/test_thread_local.c | 2 -- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/components/esp32/ld/esp32.project.ld.in b/components/esp32/ld/esp32.project.ld.in index 5e88cb7d23..f43bf7db70 100644 --- a/components/esp32/ld/esp32.project.ld.in +++ b/components/esp32/ld/esp32.project.ld.in @@ -249,7 +249,8 @@ SECTIONS ASSERT(((_bss_end - ORIGIN(dram0_0_seg)) <= LENGTH(dram0_0_seg)), "DRAM segment data does not fit.") - .flash.rodata : + /* When modifying the alignment, update tls_section_alignment in pxPortInitialiseStack */ + .flash.rodata : ALIGN(0x10) { _rodata_start = ABSOLUTE(.); diff --git a/components/esp32s2/ld/esp32s2.project.ld.in b/components/esp32s2/ld/esp32s2.project.ld.in index 95bbae561e..39c55462fd 100644 --- a/components/esp32s2/ld/esp32s2.project.ld.in +++ b/components/esp32s2/ld/esp32s2.project.ld.in @@ -246,7 +246,8 @@ SECTIONS ASSERT(((_bss_end - ORIGIN(dram0_0_seg)) <= LENGTH(dram0_0_seg)), "DRAM segment data does not fit.") - .flash.rodata : + /* When modifying the alignment, update tls_section_alignment in pxPortInitialiseStack */ + .flash.rodata : ALIGN(0x10) { _rodata_start = ABSOLUTE(.); diff --git a/components/freertos/port.c b/components/freertos/port.c index 0ae24f9817..487d260e22 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -227,9 +227,20 @@ StackType_t *pxPortInitialiseStack( StackType_t *pxTopOfStack, TaskFunction_t px task_thread_local_start = (void *)(((uint32_t)pxTopOfStack - XT_CP_SIZE - thread_local_sz) & ~0xf); memcpy(task_thread_local_start, &_thread_local_start, thread_local_sz); threadptr = (uint32_t *)(sp + XT_STK_EXTRA); - /* shift threadptr by the offset of _thread_local_start from DROM start; - need to take into account extra 16 bytes offset */ - *threadptr = (uint32_t)task_thread_local_start - ((uint32_t)&_thread_local_start - (uint32_t)&_rodata_start) - 0x10; + /* Calculate THREADPTR value: + * The generated code will add THREADPTR value to a constant value determined at link time, + * to get the address of the TLS variable. + * The constant value is calculated by the linker as follows + * (search for 'tpoff' in elf32-xtensa.c in BFD): + * offset = address - tls_section_vma + align_up(TCB_SIZE, tls_section_alignment) + * where TCB_SIZE is hardcoded to 8. There doesn't seem to be a way to propagate + * the section alignment value from the ld script into the code, so it is hardcoded + * in both places. + */ + const uint32_t tls_section_alignment = 0x10; /* has to be in sync with ALIGN value of .flash.rodata section */ + const uint32_t tcb_size = 8; /* Unrelated to FreeRTOS, this is the constant from BFD */ + const uint32_t base = (tcb_size + tls_section_alignment - 1) & (~(tls_section_alignment - 1)); + *threadptr = (uint32_t)task_thread_local_start - ((uint32_t)&_thread_local_start - (uint32_t)&_rodata_start) - base; #if XCHAL_CP_NUM > 0 /* Init the coprocessor save area (see xtensa_context.h) */ diff --git a/components/freertos/test/test_thread_local.c b/components/freertos/test/test_thread_local.c index 33de6c4209..b92d5b3462 100644 --- a/components/freertos/test/test_thread_local.c +++ b/components/freertos/test/test_thread_local.c @@ -11,7 +11,6 @@ #include "test_utils.h" #include "sdkconfig.h" -#if !TEMPORARY_DISABLED_FOR_TARGETS(ESP32S2) static __thread int tl_test_var1; static __thread uint8_t tl_test_var2 = 55; @@ -109,5 +108,4 @@ TEST_CASE("TLS test", "[freertos]") } vTaskDelay(10); /* Make sure idle task can clean up s_task, before it goes out of scope */ } -#endif