forked from espressif/esp-idf
spi_flash: fix race condition when doing operations in unpinned tasks
spi_flash_enable_interrupts_caches_and_other_cpu function used to enable non-IRAM interrupts after giving up flash operation lock, which would cause problems if another task was waiting on the lock to start a flash operation. In fact, non-IRAM interrupts should be re-enabled before the task scheduler is resumed. Otherwise non-pinned task can be moved to the other CPU due to preemption, causing esp_intr_noniram_enable to be called on the other CPU, causing an abort to be triggered. Fixes the issue reported in https://github.com/espressif/esp-idf/pull/258
This commit is contained in:
@@ -41,6 +41,9 @@ static uint32_t s_flash_op_cache_state[2];
|
|||||||
static SemaphoreHandle_t s_flash_op_mutex;
|
static SemaphoreHandle_t s_flash_op_mutex;
|
||||||
static volatile bool s_flash_op_can_start = false;
|
static volatile bool s_flash_op_can_start = false;
|
||||||
static volatile bool s_flash_op_complete = false;
|
static volatile bool s_flash_op_complete = false;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
static volatile int s_flash_op_cpu = -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
void spi_flash_init_lock()
|
void spi_flash_init_lock()
|
||||||
{
|
{
|
||||||
@@ -85,6 +88,11 @@ void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu()
|
|||||||
|
|
||||||
const uint32_t cpuid = xPortGetCoreID();
|
const uint32_t cpuid = xPortGetCoreID();
|
||||||
const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0;
|
const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
// For sanity check later: record the CPU which has started doing flash operation
|
||||||
|
assert(s_flash_op_cpu == -1);
|
||||||
|
s_flash_op_cpu = cpuid;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (xTaskGetSchedulerState() == taskSCHEDULER_NOT_STARTED) {
|
if (xTaskGetSchedulerState() == taskSCHEDULER_NOT_STARTED) {
|
||||||
// Scheduler hasn't been started yet, it means that spi_flash API is being
|
// Scheduler hasn't been started yet, it means that spi_flash API is being
|
||||||
@@ -98,12 +106,13 @@ void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu()
|
|||||||
// disable cache there and block other tasks from executing.
|
// disable cache there and block other tasks from executing.
|
||||||
s_flash_op_can_start = false;
|
s_flash_op_can_start = false;
|
||||||
s_flash_op_complete = false;
|
s_flash_op_complete = false;
|
||||||
esp_ipc_call(other_cpuid, &spi_flash_op_block_func, (void*) other_cpuid);
|
esp_err_t ret = esp_ipc_call(other_cpuid, &spi_flash_op_block_func, (void*) other_cpuid);
|
||||||
|
assert(ret == ESP_OK);
|
||||||
while (!s_flash_op_can_start) {
|
while (!s_flash_op_can_start) {
|
||||||
// Busy loop and wait for spi_flash_op_block_func to disable cache
|
// Busy loop and wait for spi_flash_op_block_func to disable cache
|
||||||
// on the other CPU
|
// on the other CPU
|
||||||
}
|
}
|
||||||
// Disable scheduler on CPU cpuid
|
// Disable scheduler on the current CPU
|
||||||
vTaskSuspendAll();
|
vTaskSuspendAll();
|
||||||
// This is guaranteed to run on CPU <cpuid> because the other CPU is now
|
// This is guaranteed to run on CPU <cpuid> because the other CPU is now
|
||||||
// occupied by highest priority task
|
// occupied by highest priority task
|
||||||
@@ -119,6 +128,11 @@ void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu()
|
|||||||
{
|
{
|
||||||
const uint32_t cpuid = xPortGetCoreID();
|
const uint32_t cpuid = xPortGetCoreID();
|
||||||
const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0;
|
const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
// Sanity check: flash operation ends on the same CPU as it has started
|
||||||
|
assert(cpuid == s_flash_op_cpu);
|
||||||
|
s_flash_op_cpu = -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Re-enable cache on this CPU
|
// Re-enable cache on this CPU
|
||||||
spi_flash_restore_cache(cpuid, s_flash_op_cache_state[cpuid]);
|
spi_flash_restore_cache(cpuid, s_flash_op_cache_state[cpuid]);
|
||||||
@@ -132,13 +146,21 @@ void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu()
|
|||||||
} else {
|
} else {
|
||||||
// Signal to spi_flash_op_block_task that flash operation is complete
|
// Signal to spi_flash_op_block_task that flash operation is complete
|
||||||
s_flash_op_complete = true;
|
s_flash_op_complete = true;
|
||||||
// Resume tasks on the current CPU
|
}
|
||||||
|
// Re-enable non-iram interrupts
|
||||||
|
esp_intr_noniram_enable();
|
||||||
|
|
||||||
|
// Resume tasks on the current CPU, if the scheduler has started.
|
||||||
|
// NOTE: enabling non-IRAM interrupts has to happen before this,
|
||||||
|
// because once the scheduler has started, due to preemption the
|
||||||
|
// current task can end up being moved to the other CPU.
|
||||||
|
// But esp_intr_noniram_enable has to be called on the same CPU which
|
||||||
|
// called esp_intr_noniram_disable
|
||||||
|
if (xTaskGetSchedulerState() != taskSCHEDULER_NOT_STARTED) {
|
||||||
xTaskResumeAll();
|
xTaskResumeAll();
|
||||||
}
|
}
|
||||||
// Release API lock
|
// Release API lock
|
||||||
spi_flash_op_unlock();
|
spi_flash_op_unlock();
|
||||||
// Re-enable non-iram interrupts
|
|
||||||
esp_intr_noniram_enable();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu_no_os()
|
void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu_no_os()
|
||||||
|
@@ -8,26 +8,25 @@
|
|||||||
#include <esp_attr.h>
|
#include <esp_attr.h>
|
||||||
|
|
||||||
struct flash_test_ctx {
|
struct flash_test_ctx {
|
||||||
uint32_t offset[2];
|
uint32_t offset;
|
||||||
bool fail[2];
|
bool fail;
|
||||||
SemaphoreHandle_t done;
|
SemaphoreHandle_t done;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void flash_test_task(void *arg)
|
static void flash_test_task(void *arg)
|
||||||
{
|
{
|
||||||
const uint32_t coreid = xPortGetCoreID();
|
|
||||||
ets_printf("t%d\n", coreid);
|
|
||||||
struct flash_test_ctx *ctx = (struct flash_test_ctx *) arg;
|
struct flash_test_ctx *ctx = (struct flash_test_ctx *) arg;
|
||||||
vTaskDelay(100 / portTICK_PERIOD_MS);
|
vTaskDelay(100 / portTICK_PERIOD_MS);
|
||||||
const uint32_t sector = ctx->offset[coreid];
|
const uint32_t sector = ctx->offset;
|
||||||
ets_printf("es%d\n", coreid);
|
printf("t%d\n", sector);
|
||||||
|
printf("es%d\n", sector);
|
||||||
if (spi_flash_erase_sector(sector) != ESP_OK) {
|
if (spi_flash_erase_sector(sector) != ESP_OK) {
|
||||||
ctx->fail[coreid] = true;
|
ctx->fail = true;
|
||||||
ets_printf("Erase failed\r\n");
|
printf("Erase failed\r\n");
|
||||||
xSemaphoreGive(ctx->done);
|
xSemaphoreGive(ctx->done);
|
||||||
vTaskDelete(NULL);
|
vTaskDelete(NULL);
|
||||||
}
|
}
|
||||||
ets_printf("ed%d\n", coreid);
|
printf("ed%d\n", sector);
|
||||||
|
|
||||||
vTaskDelay(0 / portTICK_PERIOD_MS);
|
vTaskDelay(0 / portTICK_PERIOD_MS);
|
||||||
|
|
||||||
@@ -35,58 +34,52 @@ static void flash_test_task(void *arg)
|
|||||||
const uint32_t n = 4096;
|
const uint32_t n = 4096;
|
||||||
for (uint32_t offset = 0; offset < n; offset += 4) {
|
for (uint32_t offset = 0; offset < n; offset += 4) {
|
||||||
if (spi_flash_write(sector * SPI_FLASH_SEC_SIZE + offset, (const uint8_t *) &val, 4) != ESP_OK) {
|
if (spi_flash_write(sector * SPI_FLASH_SEC_SIZE + offset, (const uint8_t *) &val, 4) != ESP_OK) {
|
||||||
ets_printf("Write failed at offset=%d\r\n", offset);
|
printf("Write failed at offset=%d\r\n", offset);
|
||||||
ctx->fail[coreid] = true;
|
ctx->fail = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ets_printf("wd%d\n", coreid);
|
printf("wd%d\n", sector);
|
||||||
|
|
||||||
vTaskDelay(0 / portTICK_PERIOD_MS);
|
vTaskDelay(0 / portTICK_PERIOD_MS);
|
||||||
|
|
||||||
uint32_t val_read;
|
uint32_t val_read;
|
||||||
for (uint32_t offset = 0; offset < n; offset += 4) {
|
for (uint32_t offset = 0; offset < n; offset += 4) {
|
||||||
if (spi_flash_read(sector * SPI_FLASH_SEC_SIZE + offset, (uint8_t *) &val_read, 4) != ESP_OK) {
|
if (spi_flash_read(sector * SPI_FLASH_SEC_SIZE + offset, (uint8_t *) &val_read, 4) != ESP_OK) {
|
||||||
ets_printf("Read failed at offset=%d\r\n", offset);
|
printf("Read failed at offset=%d\r\n", offset);
|
||||||
ctx->fail[coreid] = true;
|
ctx->fail = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (val_read != val) {
|
if (val_read != val) {
|
||||||
ets_printf("Read invalid value=%08x at offset=%d\r\n", val_read, offset);
|
printf("Read invalid value=%08x at offset=%d\r\n", val_read, offset);
|
||||||
ctx->fail[coreid] = true;
|
ctx->fail = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ets_printf("td%d\n", coreid);
|
printf("td%d\n", sector);
|
||||||
xSemaphoreGive(ctx->done);
|
xSemaphoreGive(ctx->done);
|
||||||
vTaskDelete(NULL);
|
vTaskDelete(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("flash write and erase work both on PRO CPU and on APP CPU", "[spi_flash]")
|
TEST_CASE("flash write and erase work both on PRO CPU and on APP CPU", "[spi_flash]")
|
||||||
{
|
{
|
||||||
TaskHandle_t procpu_task;
|
SemaphoreHandle_t done = xSemaphoreCreateCounting(4, 0);
|
||||||
TaskHandle_t appcpu_task;
|
struct flash_test_ctx ctx[4] = {
|
||||||
struct flash_test_ctx ctx;
|
{ .offset = 0x100 + 6, .done = done },
|
||||||
|
{ .offset = 0x100 + 7, .done = done },
|
||||||
|
{ .offset = 0x100 + 8, .done = done },
|
||||||
|
{ .offset = 0x100 + 9, .done = done }
|
||||||
|
};
|
||||||
|
|
||||||
ctx.offset[0] = 6;
|
xTaskCreatePinnedToCore(flash_test_task, "1", 2048, &ctx[0], 3, NULL, 0);
|
||||||
ctx.offset[1] = 7;
|
xTaskCreatePinnedToCore(flash_test_task, "2", 2048, &ctx[1], 3, NULL, 1);
|
||||||
ctx.fail[0] = 0;
|
xTaskCreatePinnedToCore(flash_test_task, "3", 2048, &ctx[2], 3, NULL, tskNO_AFFINITY);
|
||||||
ctx.fail[1] = 0;
|
xTaskCreatePinnedToCore(flash_test_task, "4", 2048, &ctx[3], 3, NULL, tskNO_AFFINITY);
|
||||||
ctx.done = xSemaphoreCreateBinary();
|
|
||||||
|
|
||||||
xTaskCreatePinnedToCore(flash_test_task, "1", 2048, &ctx, 3, &procpu_task, 0);
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (portNUM_PROCESSORS == 2) {
|
xSemaphoreTake(done, portMAX_DELAY);
|
||||||
xTaskCreatePinnedToCore(flash_test_task, "2", 2048, &ctx, 3, &appcpu_task, 1);
|
TEST_ASSERT_FALSE(ctx[i].fail);
|
||||||
}
|
|
||||||
|
|
||||||
xSemaphoreTake(ctx.done, portMAX_DELAY);
|
|
||||||
if (portNUM_PROCESSORS == 2) {
|
|
||||||
xSemaphoreTake(ctx.done, portMAX_DELAY);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_ASSERT_EQUAL(false, ctx.fail[0]);
|
|
||||||
if (portNUM_PROCESSORS == 2) {
|
|
||||||
TEST_ASSERT_EQUAL(false, ctx.fail[1]);
|
|
||||||
}
|
}
|
||||||
|
vSemaphoreDelete(done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user