mirror of
				https://github.com/espressif/esp-idf.git
				synced 2025-10-26 04:31:43 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			367 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			367 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // Copyright 2018 Espressif Systems (Shanghai) PTE LTD
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #include "freertos/FreeRTOS.h"
 | |
| #include "freertos/task.h"
 | |
| #include "esp_spiram.h"
 | |
| #include "rom/cache.h"
 | |
| #include "sdkconfig.h"
 | |
| #include "esp_himem.h"
 | |
| #include "soc/soc.h"
 | |
| #include "esp_log.h"
 | |
| 
 | |
| /*
 | |
| So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of
 | |
| it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's
 | |
| allowed to map a range of physical memory into the address space two times, there's no cache consistency between the
 | |
| two regions.
 | |
| 
 | |
| This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on
 | |
| the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated
 | |
| on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within
 | |
| A, the write to Y may show up before the write to X does.
 | |
| 
 | |
| It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely
 | |
| undone because of a write to a different addres in B that happens to be in the same 32-byte cache line.
 | |
| 
 | |
| Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make
 | |
| supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own
 | |
| management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more
 | |
| fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case,
 | |
| to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API
 | |
| itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the
 | |
| future, we can do this.
 | |
| 
 | |
| Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1
 | |
| peripheral. This needs support for SPI1 to be in the SPI driver, however.
 | |
| */
 | |
| 
 | |
| #if CONFIG_SPIRAM_BANKSWITCH_ENABLE
 | |
| #define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE
 | |
| #else
 | |
| #define SPIRAM_BANKSWITCH_RESERVE 0
 | |
| #endif
 | |
| 
 | |
| #define CACHE_BLOCKSIZE (32*1024)
 | |
| 
 | |
| //Start of the virtual address range reserved for himem use
 | |
| #define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE)
 | |
| //Start MMU block reserved for himem use
 | |
| #define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
 | |
| //Start physical block
 | |
| #define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE)
 | |
| 
 | |
| #define TAG "esp_himem"
 | |
| 
 | |
| #define HIMEM_CHECK(cond, str, err) if (cond) do {ESP_LOGE(TAG, "%s: %s", __FUNCTION__, str); return err; } while(0)
 | |
| 
 | |
| // Metadata for a block of physical RAM
 | |
| typedef struct {
 | |
|     unsigned int is_alloced: 1;
 | |
|     unsigned int is_mapped: 1;
 | |
| } ramblock_t;
 | |
| 
 | |
| //Metadata for a 32-K memory address range
 | |
| typedef struct {
 | |
|     unsigned int is_alloced: 1;
 | |
|     unsigned int is_mapped: 1;
 | |
|     unsigned int ram_block: 16;
 | |
| } rangeblock_t;
 | |
| 
 | |
| static ramblock_t *s_ram_descriptor = NULL;
 | |
| static rangeblock_t *s_range_descriptor = NULL;
 | |
| static int s_ramblockcnt = 0;
 | |
| static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE;
 | |
| 
 | |
| //Handle for a window of address space
 | |
| typedef struct esp_himem_rangedata_t {
 | |
|     int block_ct;
 | |
|     int block_start;
 | |
| } esp_himem_rangedata_t;
 | |
| 
 | |
| //Handle for a range of physical memory
 | |
| typedef struct esp_himem_ramdata_t {
 | |
|     int block_ct;
 | |
|     uint16_t *block;
 | |
| } esp_himem_ramdata_t;
 | |
| 
 | |
| static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED;
 | |
| 
 | |
| static inline int ramblock_idx_valid(int ramblock_idx)
 | |
| {
 | |
|     return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt);
 | |
| }
 | |
| 
 | |
| static inline int rangeblock_idx_valid(int rangeblock_idx)
 | |
| {
 | |
|     return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt);
 | |
| }
 | |
| 
 | |
| static void set_bank(int virt_bank, int phys_bank, int ct)
 | |
| {
 | |
|     int r;
 | |
|     r = cache_sram_mmu_set( 0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
 | |
|     assert(r == 0);
 | |
|     r = cache_sram_mmu_set( 1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct );
 | |
|     assert(r == 0);
 | |
| }
 | |
| 
 | |
| size_t esp_himem_get_phys_size()
 | |
| {
 | |
|     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
 | |
|     return esp_spiram_get_size()-paddr_start;
 | |
| }
 | |
| 
 | |
| size_t esp_himem_get_free_size()
 | |
| {
 | |
|     size_t ret=0;
 | |
|     for (int i = 0; i < s_ramblockcnt; i++) {
 | |
|         if (!s_ram_descriptor[i].is_alloced) ret+=CACHE_BLOCKSIZE;
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| size_t esp_himem_reserved_area_size() {
 | |
|     return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE;
 | |
| }
 | |
| 
 | |
| 
 | |
| void __attribute__((constructor)) esp_himem_init()
 | |
| {
 | |
|     if (SPIRAM_BANKSWITCH_RESERVE == 0) return;
 | |
|     int maxram=esp_spiram_get_size();
 | |
|     //catch double init
 | |
|     HIMEM_CHECK(s_ram_descriptor != NULL, "already initialized", ); //Looks weird; last arg is empty so it expands to 'return ;'
 | |
|     HIMEM_CHECK(s_range_descriptor != NULL, "already initialized", );
 | |
|     //need to have some reserved banks
 | |
|     HIMEM_CHECK(SPIRAM_BANKSWITCH_RESERVE == 0, "No banks reserved for himem", );
 | |
|     //Start and end of physical reserved memory. Note it starts slightly under
 | |
|     //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc
 | |
|     //anymore; we treat them as himem instead.
 | |
|     int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE);
 | |
|     int paddr_end = maxram;
 | |
|     s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE);
 | |
|     //Allocate data structures
 | |
|     s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt);
 | |
|     s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE);
 | |
|     if (s_ram_descriptor == NULL || s_range_descriptor == NULL) {
 | |
|         ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!");
 | |
|         free(s_ram_descriptor);
 | |
|         free(s_range_descriptor);
 | |
|         return;
 | |
|     }
 | |
|     ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.", 
 | |
|                 SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start)/1024);
 | |
| }
 | |
| 
 | |
| 
 | |
| //Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return
 | |
| //true if blocks can be allocated, false if not.
 | |
| static bool allocate_blocks(int count, uint16_t *blocks_out)
 | |
| {
 | |
|     int n = 0;
 | |
|     for (int i = 0; i < s_ramblockcnt && n != count; i++) {
 | |
|         if (!s_ram_descriptor[i].is_alloced) {
 | |
|             blocks_out[n] = i;
 | |
|             n++;
 | |
|         }
 | |
|     }
 | |
|     if (n == count) {
 | |
|         //All blocks could be allocated. Mark as in use.
 | |
|         for (int i = 0; i < count; i++) {
 | |
|             s_ram_descriptor[blocks_out[i]].is_alloced = true;
 | |
|             assert(s_ram_descriptor[blocks_out[i]].is_mapped == false);
 | |
|         }
 | |
|         return true;
 | |
|     } else {
 | |
|         //Error allocating blocks
 | |
|         return false;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out)
 | |
| {
 | |
|     if (size % CACHE_BLOCKSIZE != 0) {
 | |
|         return ESP_ERR_INVALID_SIZE;
 | |
|     }
 | |
|     int blocks = size / CACHE_BLOCKSIZE;
 | |
|     esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1);
 | |
|     if (!r) {
 | |
|         goto nomem;
 | |
|     }
 | |
|     r->block = calloc(sizeof(uint16_t), blocks);
 | |
|     if (!r->block) {
 | |
|         goto nomem;
 | |
|     }
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     int ok = allocate_blocks(blocks, r->block);
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
|     if (!ok) {
 | |
|         goto nomem;
 | |
|     }
 | |
|     r->block_ct = blocks;
 | |
|     *handle_out = r;
 | |
|     return ESP_OK;
 | |
| nomem:
 | |
|     if (r) {
 | |
|         free(r->block);
 | |
|     }
 | |
|     free(r);
 | |
|     return ESP_ERR_NO_MEM;
 | |
| }
 | |
| 
 | |
| esp_err_t esp_himem_free(esp_himem_handle_t handle)
 | |
| {
 | |
|     //Check if any of the blocks is still mapped; fail if this is the case.
 | |
|     for (int i = 0; i < handle->block_ct; i++) {
 | |
|         assert(ramblock_idx_valid(handle->block[i]));
 | |
|         HIMEM_CHECK(s_ram_descriptor[handle->block[i]].is_mapped, "block in range still mapped", ESP_ERR_INVALID_ARG);
 | |
|     }
 | |
|     //Mark blocks as free
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < handle->block_ct; i++) {
 | |
|         s_ram_descriptor[handle->block[i]].is_alloced = false;
 | |
|     }
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
| 
 | |
|     //Free handle
 | |
|     free(handle->block);
 | |
|     free(handle);
 | |
|     return ESP_OK;
 | |
| }
 | |
| 
 | |
| 
 | |
| esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out)
 | |
| {
 | |
|     HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE);
 | |
|     HIMEM_CHECK(size % CACHE_BLOCKSIZE != 0, "requested size not aligned to blocksize", ESP_ERR_INVALID_SIZE);
 | |
|     int blocks = size / CACHE_BLOCKSIZE;
 | |
|     esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1);
 | |
|     if (!r) {
 | |
|         return ESP_ERR_NO_MEM;
 | |
|     }
 | |
|     r->block_ct = blocks;
 | |
|     r->block_start = -1;
 | |
|     int start_free = 0;
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < s_rangeblockcnt; i++) {
 | |
|         if (s_range_descriptor[i].is_alloced) {
 | |
|             start_free = i + 1; //optimistically assume next block is free...
 | |
|         } else if (i - start_free == blocks - 1) {
 | |
|             //We found a span of blocks that's big enough to allocate the requested range in.
 | |
|             r->block_start = start_free;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (r->block_start == -1) {
 | |
|         //Couldn't find enough free blocks
 | |
|         free(r);
 | |
|         portEXIT_CRITICAL(&spinlock);
 | |
|         return ESP_ERR_NO_MEM;
 | |
|     }
 | |
|     //Range is found. Mark the blocks as in use.
 | |
|     for (int i = 0; i < blocks; i++) {
 | |
|         s_range_descriptor[r->block_start + i].is_alloced = 1;
 | |
|     }
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
|     //All done.
 | |
|     *handle_out = r;
 | |
|     return ESP_OK;
 | |
| }
 | |
| 
 | |
| esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle)
 | |
| {
 | |
|     //Check if any of the blocks in the range have a mapping
 | |
|     for (int i = 0; i < handle->block_ct; i++) {
 | |
|         assert(rangeblock_idx_valid(handle->block_start + i));
 | |
|         assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid
 | |
|         HIMEM_CHECK(s_range_descriptor[i + handle->block_start].is_mapped, "memory still mapped to range", ESP_ERR_INVALID_ARG);
 | |
|     }
 | |
|     //We should be good to free this. Mark blocks as free.
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < handle->block_ct; i++) {
 | |
|         s_range_descriptor[i + handle->block_start].is_alloced = 0;
 | |
|     }
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
|     free(handle);
 | |
|     return ESP_OK;
 | |
| }
 | |
| 
 | |
| 
 | |
| esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr)
 | |
| {
 | |
|     int ram_block = ram_offset / CACHE_BLOCKSIZE;
 | |
|     int range_block = range_offset / CACHE_BLOCKSIZE;
 | |
|     int blockcount = len / CACHE_BLOCKSIZE;
 | |
|     HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE);
 | |
|     //Offsets and length must be block-aligned
 | |
|     HIMEM_CHECK(ram_offset % CACHE_BLOCKSIZE != 0, "ram offset not aligned to blocksize", ESP_ERR_INVALID_ARG);
 | |
|     HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range not aligned to blocksize", ESP_ERR_INVALID_ARG);
 | |
|     HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "length not aligned to blocksize", ESP_ERR_INVALID_ARG);
 | |
|     //ram and range should be within allocated range
 | |
|     HIMEM_CHECK(ram_block + blockcount > handle->block_ct, "args not in range of phys ram handle", ESP_ERR_INVALID_SIZE);
 | |
|     HIMEM_CHECK(range_block + blockcount > range->block_ct, "args not in range of range handle", ESP_ERR_INVALID_SIZE);
 | |
| 
 | |
|     //Check if ram blocks aren't already mapped, and if memory range is unmapped
 | |
|     for (int i = 0; i < blockcount; i++) {
 | |
|         HIMEM_CHECK(s_ram_descriptor[handle->block[i + ram_block]].is_mapped, "ram already mapped", ESP_ERR_INVALID_STATE);
 | |
|         HIMEM_CHECK(s_range_descriptor[range->block_start + i + range_block].is_mapped, "range already mapped", ESP_ERR_INVALID_STATE);
 | |
|     }
 | |
| 
 | |
|     //Map and mark as mapped
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < blockcount; i++) {
 | |
|         assert(ramblock_idx_valid(handle->block[i + ram_block]));
 | |
|         s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1;
 | |
|         s_range_descriptor[range->block_start + i + range_block].is_mapped = 1;
 | |
|         s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block];
 | |
|     }
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < blockcount; i++) {
 | |
|         set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1);
 | |
|     }
 | |
| 
 | |
|     //Set out pointer
 | |
|     *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_offset) * CACHE_BLOCKSIZE);
 | |
|     return ESP_OK;
 | |
| }
 | |
| 
 | |
| esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len)
 | |
| {
 | |
|     //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped.
 | |
|     //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use
 | |
|     //the block for a different physical address.
 | |
|     int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START;
 | |
|     int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start;
 | |
|     int blockcount = len / CACHE_BLOCKSIZE;
 | |
|     HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range offset not block-aligned", ESP_ERR_INVALID_ARG);
 | |
|     HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "map length not block-aligned", ESP_ERR_INVALID_ARG);
 | |
|     HIMEM_CHECK(range_block + blockcount > range->block_ct, "range out of bounds for handle", ESP_ERR_INVALID_ARG);
 | |
| 
 | |
|     portENTER_CRITICAL(&spinlock);
 | |
|     for (int i = 0; i < blockcount; i++) {
 | |
|         int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block;
 | |
|         assert(ramblock_idx_valid(ramblock));
 | |
|         s_ram_descriptor[ramblock].is_mapped = 0;
 | |
|         s_range_descriptor[range->block_start + i + range_block].is_mapped = 0;
 | |
|     }
 | |
|     esp_spiram_writeback_cache();
 | |
|     portEXIT_CRITICAL(&spinlock);
 | |
|     return ESP_OK;
 | |
| }
 | |
| 
 | |
| 
 |