v2.0.0 Add support for ESP32S2 and update ESP-IDF to 4.4 (#4996)

This is very much still work in progress and much more will change before the final 2.0.0 Some APIs have changed. New libraries have been added. LittleFS included. Co-authored-by: Seon Rozenblum <seonr@3sprockets.com> Co-authored-by: Me No Dev <me-no-dev@users.noreply.github.com> Co-authored-by: geeksville <kevinh@geeksville.com> Co-authored-by: Mike Dunston <m_dunston@comcast.net> Co-authored-by: Unexpected Maker <seon@unexpectedmaker.com> Co-authored-by: Seon Rozenblum <seonr@3sprockets.com> Co-authored-by: microDev <70126934+microDev1@users.noreply.github.com> Co-authored-by: tobozo <tobozo@users.noreply.github.com> Co-authored-by: bobobo1618 <bobobo1618@users.noreply.github.com> Co-authored-by: lorol <lorolouis@gmail.com> Co-authored-by: geeksville <kevinh@geeksville.com> Co-authored-by: Limor "Ladyada" Fried <limor@ladyada.net> Co-authored-by: Sweety <switi.mhaiske@espressif.com> Co-authored-by: Loick MAHIEUX <loick111@gmail.com> Co-authored-by: Larry Bernstone <lbernstone@gmail.com> Co-authored-by: Valerii Koval <valeros@users.noreply.github.com> Co-authored-by: 快乐的我531 <2302004040@qq.com> Co-authored-by: chegewara <imperiaonline4@gmail.com> Co-authored-by: Clemens Kirchgatterer <clemens@1541.org> Co-authored-by: Aron Rubin <aronrubin@gmail.com> Co-authored-by: Pete Lewis <601236+lewispg228@users.noreply.github.com>
2021-04-05 14:23:58 +03:00
parent 46d5afb17f
commit 5502879a5b
5209 changed files with 826360 additions and 322816 deletions
--- a/tools/sdk/esp32/include/esp-face/lib/include/cat_face_3.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/cat_face_3.h
@ -0,0 +1,40 @@
+/*
+ * ESPRESSIF MIT License
+ *
+ * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+ *
+ * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+ * it is free of charge, to any person_body obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+ * to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+#include "freertos/FreeRTOS.h"
+#include "detection.h"
+
+    extern detection_model_t cat_face_3_model;
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/detection.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/detection.h
@ -0,0 +1,87 @@
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+#include "freertos/FreeRTOS.h"
+
+    typedef enum
+    {
+        Anchor_Point, /*<! Anchor point detection model*/
+        Anchor_Box    /*<! Anchor box detection model */
+    } detection_model_type_t;
+
+    typedef struct
+    {
+        int **anchors_shape; /*<! Anchor shape of this stage */
+        int stride;          /*<! Zoom in stride of this stage */
+        int boundary;        /*<! Detection image low-limit of this stage */
+        int project_offset;  /*<! Project offset of this stage */
+    } detection_stage_config_t;
+
+    typedef struct
+    {
+        dl_matrix3dq_t *score;           /*<! score feature map of this stage*/
+        dl_matrix3dq_t *box_offset;      /*<! box_offset feature map of this stage*/
+        dl_matrix3dq_t *landmark_offset; /*<! landmark_offset feature map of this stage */
+    } detection_stage_result_t;
+
+    typedef struct
+    {
+        int resized_height;    /*<! The height after resized */
+        int resized_width;     /*<! The width after resized */
+        fptp_t y_resize_scale; /*<! resized_height / input_height */
+        fptp_t x_resize_scale; /*<! resized_width / input_width */
+        qtp_t score_threshold; /*<! Score threshold of detection model */
+        fptp_t nms_threshold;  /*<! NMS threshold of detection model */
+        bool with_landmark;    /*<! Whether detection with landmark, true: with, false: without */
+        bool free_image;       /*<! Whether free the resized image */
+        int enabled_top_k;     /*<! The number of enabled stages */
+    } detection_model_config_t;
+
+    typedef struct
+    {
+        detection_stage_config_t *stage_config;                                                                      /*<! Configuration of each stage */
+        int stage_number;                                                                                            /*<! The number of stages */
+        detection_model_type_t model_type;                                                                           /*<! The type of detection model */
+        detection_model_config_t model_config;                                                                       /*<! Configuration of detection model */
+        detection_stage_result_t *(*op)(dl_matrix3dq_t *, detection_model_config_t *);                               /*<! The function of detection inference */
+        void *(*get_boxes)(detection_stage_result_t *, detection_model_config_t *, detection_stage_config_t *, int); /*<! The function of how to get real boxes */
+    } detection_model_t;
+
+    /**
+     * @brief free 'detection_stage_result_t' type value
+     * 
+     * @param value A 'detection_stage_result_t' type value
+     */
+    void free_detection_stage_result(detection_stage_result_t value);
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/dl_lib_matrix3d.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/dl_lib_matrix3d.h
@ -0,0 +1,819 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT
+#include "freertos/FreeRTOS.h"
+#define DL_SPIRAM_SUPPORT 1
+#else
+#define DL_SPIRAM_SUPPORT 0
+#endif
+
+
+#ifndef max
+#define max(x, y) (((x) < (y)) ? (y) : (x))
+#endif
+
+#ifndef min
+#define min(x, y) (((x) < (y)) ? (x) : (y))
+#endif
+
+typedef float fptp_t;
+typedef uint8_t uc_t;
+
+typedef enum
+{
+    DL_SUCCESS = 0,
+    DL_FAIL = 1,
+} dl_error_type;
+
+typedef enum
+{
+    PADDING_VALID = 0,                   /*!< Valid padding */
+    PADDING_SAME = 1,                    /*!< Same padding, from right to left, free input */
+    PADDING_SAME_DONT_FREE_INPUT = 2,    /*!< Same padding, from right to left, do not free input */
+    PADDING_SAME_MXNET = 3,              /*!< Same padding, from left to right */
+} dl_padding_type;
+
+typedef enum
+{
+    DL_POOLING_MAX = 0,        /*!< Max pooling */
+    DL_POOLING_AVG = 1,        /*!< Average pooling */
+} dl_pooling_type; 
+/*
+ * Matrix for 3d
+ * @Warning: the sequence of variables is fixed, cannot be modified, otherwise there will be errors in esp_dsp_dot_float
+ */
+typedef struct
+{
+    int w;        /*!< Width */
+    int h;        /*!< Height */
+    int c;        /*!< Channel */
+    int n;        /*!< Number of filter, input and output must be 1 */
+    int stride;   /*!< Step between lines */
+    fptp_t *item; /*!< Data */
+} dl_matrix3d_t;
+
+typedef struct
+{
+    int w;      /*!< Width */
+    int h;      /*!< Height */
+    int c;      /*!< Channel */
+    int n;      /*!< Number of filter, input and output must be 1 */
+    int stride; /*!< Step between lines */
+    uc_t *item; /*!< Data */
+} dl_matrix3du_t;
+
+typedef enum
+{
+    UPSAMPLE_NEAREST_NEIGHBOR = 0, /*!< Use nearest neighbor interpolation as the upsample method*/
+    UPSAMPLE_BILINEAR = 1,        /*!< Use nearest bilinear interpolation as the upsample method*/
+} dl_upsample_type;
+
+typedef struct
+{
+    int stride_x;                    /*!< Strides of width */
+    int stride_y;                    /*!< Strides of height */
+    dl_padding_type padding;         /*!< Padding type */
+} dl_matrix3d_mobilenet_config_t;
+
+/*
+ * @brief Allocate a zero-initialized space. Must use 'dl_lib_free' to free the memory.
+ *
+ * @param cnt  Count of units.
+ * @param size Size of unit.
+ * @param align Align of memory. If not required, set 0.
+ * @return Pointer of allocated memory. Null for failed.
+ */
+static void *dl_lib_calloc(int cnt, int size, int align)
+{
+    int total_size = cnt * size + align + sizeof(void *);
+    void *res = malloc(total_size);
+    if (NULL == res)
+    {
+#if DL_SPIRAM_SUPPORT
+        res = heap_caps_malloc(total_size, MALLOC_CAP_8BIT | MALLOC_CAP_SPIRAM);
+    }
+    if (NULL == res)
+    {
+        printf("Item psram alloc failed. Size: %d x %d\n", cnt, size);
+#else
+        printf("Item alloc failed. Size: %d x %d, SPIRAM_FLAG: %d\n", cnt, size, DL_SPIRAM_SUPPORT);
+#endif
+        return NULL;
+    }
+    bzero(res, total_size);
+    void **data = (void **)res + 1;
+    void **aligned;
+    if (align)
+        aligned = (void **)(((size_t)data + (align - 1)) & -align);
+    else
+        aligned = data;
+
+    aligned[-1] = res;
+    return (void *)aligned;
+}
+
+/**
+ * @brief Free the memory space allocated by 'dl_lib_calloc'
+ * 
+ */
+static inline void dl_lib_free(void *d)
+{
+    if (NULL == d)
+        return;
+
+    free(((void **)d)[-1]);
+}
+
+/*
+ * @brief Allocate a 3D matrix with float items, the access sequence is NHWC
+ *
+ * @param n     Number of matrix3d, for filters it is out channels, for others it is 1
+ * @param w     Width of matrix3d
+ * @param h     Height of matrix3d
+ * @param c     Channel of matrix3d
+ * @return      3d matrix
+ */
+static inline dl_matrix3d_t *dl_matrix3d_alloc(int n, int w, int h, int c)
+{
+    dl_matrix3d_t *r = (dl_matrix3d_t *)dl_lib_calloc(1, sizeof(dl_matrix3d_t), 0);
+    if (NULL == r)
+    {
+        printf("internal r failed.\n");
+        return NULL;
+    }
+    fptp_t *items = (fptp_t *)dl_lib_calloc(n * w * h * c, sizeof(fptp_t), 0);
+    if (NULL == items)
+    {
+        printf("matrix3d item alloc failed.\n");
+        dl_lib_free(r);
+        return NULL;
+    }
+
+    r->w = w;
+    r->h = h;
+    r->c = c;
+    r->n = n;
+    r->stride = w * c;
+    r->item = items;
+
+    return r;
+}
+
+/*
+ * @brief Allocate a 3D matrix with 8-bits items, the access sequence is NHWC
+ *
+ * @param n     Number of matrix3d, for filters it is out channels, for others it is 1
+ * @param w     Width of matrix3d
+ * @param h     Height of matrix3d
+ * @param c     Channel of matrix3d
+ * @return      3d matrix
+ */
+static inline dl_matrix3du_t *dl_matrix3du_alloc(int n, int w, int h, int c)
+{
+    dl_matrix3du_t *r = (dl_matrix3du_t *)dl_lib_calloc(1, sizeof(dl_matrix3du_t), 0);
+    if (NULL == r)
+    {
+        printf("internal r failed.\n");
+        return NULL;
+    }
+    uc_t *items = (uc_t *)dl_lib_calloc(n * w * h * c, sizeof(uc_t), 0);
+    if (NULL == items)
+    {
+        printf("matrix3du item alloc failed.\n");
+        dl_lib_free(r);
+        return NULL;
+    }
+
+    r->w = w;
+    r->h = h;
+    r->c = c;
+    r->n = n;
+    r->stride = w * c;
+    r->item = items;
+
+    return r;
+}
+
+/*
+ * @brief Free a matrix3d
+ *
+ * @param m matrix3d with float items
+ */
+static inline void dl_matrix3d_free(dl_matrix3d_t *m)
+{
+    if (NULL == m)
+        return;
+    if (NULL == m->item)
+    {
+        dl_lib_free(m);
+        return;
+    }
+    dl_lib_free(m->item);
+    dl_lib_free(m);
+}
+
+/*
+ * @brief Free a matrix3d
+ *
+ * @param m matrix3d with 8-bits items
+ */
+static inline void dl_matrix3du_free(dl_matrix3du_t *m)
+{
+    if (NULL == m)
+        return;
+    if (NULL == m->item)
+    {
+        dl_lib_free(m);
+        return;
+    }
+    dl_lib_free(m->item);
+    dl_lib_free(m);
+}
+
+
+/*
+ * @brief Dot product with a vector and matrix
+ *
+ * @param out   Space to put the result
+ * @param in    input vector
+ * @param f     filter matrix
+ */
+void dl_matrix3dff_dot_product(dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);
+
+/**
+ * @brief Do a softmax operation on a matrix3d
+ *
+ * @param in        Input matrix3d
+ */
+void dl_matrix3d_softmax(dl_matrix3d_t *m);
+
+/**
+ * @brief Copy a range of float items from an existing matrix to a preallocated matrix
+ *
+ * @param dst   The destination slice matrix
+ * @param src   The source matrix to slice
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ */
+void dl_matrix3d_slice_copy(dl_matrix3d_t *dst,
+                            dl_matrix3d_t *src,
+                            int x,
+                            int y,
+                            int w,
+                            int h);
+
+/**
+ * @brief Copy a range of 8-bits items from an existing matrix to a preallocated matrix
+ *
+ * @param dst   The destination slice matrix
+ * @param src   The source matrix to slice
+ * @param x     X-offset of the origin of the returned matrix within the sliced matrix
+ * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
+ * @param w     Width of the resulting matrix
+ * @param h     Height of the resulting matrix
+ */
+void dl_matrix3du_slice_copy(dl_matrix3du_t *dst,
+                             dl_matrix3du_t *src,
+                             int x,
+                             int y,
+                             int w,
+                             int h);
+
+/**
+ * @brief Transform a sliced matrix block from nhwc to nchw, the block needs to be memory continous.
+ *
+ * @param out  The destination sliced matrix in nchw
+ * @param in   The source sliced matrix in nhwc
+ */
+void dl_matrix3d_sliced_transform_nchw(dl_matrix3d_t *out,
+                                       dl_matrix3d_t *in);
+
+/**
+ * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in               Input matrix3d
+ * @param filter           Weights of the neurons
+ * @param bias             Bias for the CNN layer
+ * @param stride_x         The step length of the convolution window in x(width) direction
+ * @param stride_y         The step length of the convolution window in y(height) direction
+ * @param padding          One of VALID or SAME
+ * @param mode             Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                         If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return dl_matrix3d_t*  The result of CNN layer
+ */
+dl_matrix3d_t *dl_matrix3d_conv(dl_matrix3d_t *in,
+                                dl_matrix3d_t *filter,
+                                dl_matrix3d_t *bias,
+                                int stride_x,
+                                int stride_y,
+                                int padding,
+                                int mode);
+
+/**
+ * @brief Do a global average pooling layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ *
+ * @return               The result of global average pooling layer
+ */
+dl_matrix3d_t *dl_matrix3d_global_pool(dl_matrix3d_t *in);
+
+/**
+ * @brief Calculate pooling layer of a feature map
+ *
+ * @param in               Input matrix, size (1, w, h, c)
+ * @param f_w              Window width
+ * @param f_h              Window height 
+ * @param stride_x         Stride in horizontal direction
+ * @param stride_y         Stride in vertical direction
+ * @param padding          Padding type: PADDING_VALID and PADDING_SAME
+ * @param pooling_type     Pooling type: DL_POOLING_MAX and POOLING_AVG
+ * @return dl_matrix3d_t*  Resulting matrix, size (1, w', h', c)
+ */
+dl_matrix3d_t *dl_matrix3d_pooling(dl_matrix3d_t *in,
+                                   int f_w,
+                                   int f_h,
+                                   int stride_x,
+                                   int stride_y,
+                                   dl_padding_type padding,
+                                   dl_pooling_type pooling_type);
+/**
+ * @brief Do a batch normalization operation, update the input matrix3d: input = input * scale + offset
+ *
+ * @param m              Input matrix3d
+ * @param scale          scale matrix3d,  scale = gamma/((moving_variance+sigma)^(1/2))
+ * @param Offset         Offset matrix3d, offset = beta-(moving_mean*gamma/((moving_variance+sigma)^(1/2)))
+ */
+void dl_matrix3d_batch_normalize(dl_matrix3d_t *m,
+                                 dl_matrix3d_t *scale,
+                                 dl_matrix3d_t *offset);
+
+/**
+ * @brief Add a pair of matrix3d item-by-item: res=in_1+in_2
+ *
+ * @param in_1             First Floating point input matrix3d
+ * @param in_2             Second Floating point input matrix3d
+ *
+ * @return dl_matrix3d_t*  Added data
+ */
+dl_matrix3d_t *dl_matrix3d_add(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);
+
+/**
+ * @brief Concatenate the channels of two matrix3ds into a new matrix3d
+ *
+ * @param in_1             First Floating point input matrix3d
+ * @param in_2             Second Floating point input matrix3d
+ *
+ * @return dl_matrix3d_t*  A newly allocated matrix3d with as avlues in_1|in_2
+ */
+dl_matrix3d_t *dl_matrix3d_concat(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);
+
+/**
+ * @brief Concatenate the channels of four matrix3ds into a new matrix3d
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ * @param in_3           Third Floating point input matrix3d
+ * @param in_4           Fourth Floating point input matrix3d
+ *
+ * @return               A newly allocated matrix3d with as avlues in_1|in_2|in_3|in_4
+ */
+dl_matrix3d_t *dl_matrix3d_concat_4(dl_matrix3d_t *in_1,
+                                    dl_matrix3d_t *in_2,
+                                    dl_matrix3d_t *in_3,
+                                    dl_matrix3d_t *in_4);
+
+/**
+ * @brief Concatenate the channels of eight matrix3ds into a new matrix3d
+ *
+ * @param in_1           First Floating point input matrix3d
+ * @param in_2           Second Floating point input matrix3d
+ * @param in_3           Third Floating point input matrix3d
+ * @param in_4           Fourth Floating point input matrix3d
+ * @param in_5           Fifth Floating point input matrix3d
+ * @param in_6           Sixth Floating point input matrix3d
+ * @param in_7           Seventh Floating point input matrix3d
+ * @param in_8           eighth Floating point input matrix3d
+ *
+ * @return               A newly allocated matrix3d with as avlues in_1|in_2|in_3|in_4|in_5|in_6|in_7|in_8
+ */
+dl_matrix3d_t *dl_matrix3d_concat_8(dl_matrix3d_t *in_1,
+                                    dl_matrix3d_t *in_2,
+                                    dl_matrix3d_t *in_3,
+                                    dl_matrix3d_t *in_4,
+                                    dl_matrix3d_t *in_5,
+                                    dl_matrix3d_t *in_6,
+                                    dl_matrix3d_t *in_7,
+                                    dl_matrix3d_t *in_8);
+
+/**
+ * @brief Do a mobilefacenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in                    Input matrix3d
+ * @param pw                    Weights of the pointwise conv layer
+ * @param pw_bn_scale           The scale params of the batch_normalize layer after the pointwise conv layer
+ * @param pw_bn_offset          The offset params of the batch_normalize layer after the pointwise conv layer
+ * @param dw                    Weights of the depthwise conv layer
+ * @param dw_bn_scale           The scale params of the batch_normalize layer after the depthwise conv layer
+ * @param dw_bn_offset          The offset params of the batch_normalize layer after the depthwise conv layer
+ * @param pw_linear             Weights of the pointwise linear conv layer
+ * @param pw_linear_bn_scale    The scale params of the batch_normalize layer after the pointwise linear conv layer
+ * @param pw_linear_bn_offset   The offset params of the batch_normalize layer after the pointwise linear conv layer
+ * @param stride_x              The step length of the convolution window in x(width) direction
+ * @param stride_y              The step length of the convolution window in y(height) direction
+ * @param padding               One of VALID or SAME
+ * @param mode                  Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return                      The result of a mobilefacenet block
+ */
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock(dl_matrix3d_t *in,
+                                           dl_matrix3d_t *pw,
+                                           dl_matrix3d_t *pw_bn_scale,
+                                           dl_matrix3d_t *pw_bn_offset,
+                                           dl_matrix3d_t *dw,
+                                           dl_matrix3d_t *dw_bn_scale,
+                                           dl_matrix3d_t *dw_bn_offset,
+                                           dl_matrix3d_t *pw_linear,
+                                           dl_matrix3d_t *pw_linear_bn_scale,
+                                           dl_matrix3d_t *pw_linear_bn_offset,
+                                           int stride_x,
+                                           int stride_y,
+                                           int padding,
+                                           int mode,
+                                           int shortcut);
+
+/**
+ * @brief Do a mobilefacenet block forward with 1x1 split conv, dimension is (number, width, height, channel)
+ *
+ * @param in                    Input matrix3d
+ * @param pw_1                  Weights of the pointwise conv layer 1
+ * @param pw_2                  Weights of the pointwise conv layer 2
+ * @param pw_bn_scale           The scale params of the batch_normalize layer after the pointwise conv layer
+ * @param pw_bn_offset          The offset params of the batch_normalize layer after the pointwise conv layer
+ * @param dw                    Weights of the depthwise conv layer
+ * @param dw_bn_scale           The scale params of the batch_normalize layer after the depthwise conv layer
+ * @param dw_bn_offset          The offset params of the batch_normalize layer after the depthwise conv layer
+ * @param pw_linear_1           Weights of the pointwise linear conv layer 1
+ * @param pw_linear_2           Weights of the pointwise linear conv layer 2
+ * @param pw_linear_bn_scale    The scale params of the batch_normalize layer after the pointwise linear conv layer
+ * @param pw_linear_bn_offset   The offset params of the batch_normalize layer after the pointwise linear conv layer
+ * @param stride_x              The step length of the convolution window in x(width) direction
+ * @param stride_y              The step length of the convolution window in y(height) direction
+ * @param padding               One of VALID or SAME
+ * @param mode                  Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return                      The result of a mobilefacenet block
+ */
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(dl_matrix3d_t *in,
+                                                 dl_matrix3d_t *pw_1,
+                                                 dl_matrix3d_t *pw_2,
+                                                 dl_matrix3d_t *pw_bn_scale,
+                                                 dl_matrix3d_t *pw_bn_offset,
+                                                 dl_matrix3d_t *dw,
+                                                 dl_matrix3d_t *dw_bn_scale,
+                                                 dl_matrix3d_t *dw_bn_offset,
+                                                 dl_matrix3d_t *pw_linear_1,
+                                                 dl_matrix3d_t *pw_linear_2,
+                                                 dl_matrix3d_t *pw_linear_bn_scale,
+                                                 dl_matrix3d_t *pw_linear_bn_offset,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 int padding,
+                                                 int mode,
+                                                 int shortcut);
+
+/**
+ * @brief           Initialize the matrix3d feature map to bias
+ * 
+ * @param out       The matrix3d feature map needs to be initialized
+ * @param bias      The bias of a convlotion operation
+ */
+void dl_matrix3d_init_bias(dl_matrix3d_t *out, dl_matrix3d_t *bias);
+
+/**
+ * @brief  Do a elementwise multiplication of two matrix3ds
+ * 
+ * @param out  Preallocated matrix3d, size (n, w, h, c)
+ * @param in1  Input matrix 1, size (n, w, h, c)
+ * @param in2  Input matrix 2, size (n, w, h, c)
+ */
+void dl_matrix3d_multiply(dl_matrix3d_t *out, dl_matrix3d_t *in1, dl_matrix3d_t *in2);
+
+//
+// Activation
+//
+
+/**
+ * @brief Do a standard relu operation, update the input matrix3d
+ *
+ * @param m        Floating point input matrix3d
+ */
+void dl_matrix3d_relu(dl_matrix3d_t *m);
+
+/**
+ * @brief Do a relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param clip      If value is higher than this, it will be clipped to this value
+ */
+void dl_matrix3d_relu_clip(dl_matrix3d_t *m, fptp_t clip);
+
+/**
+ * @brief Do a Prelu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param alpha     If value is less than zero, it will be updated by multiplying this factor
+ */
+void dl_matrix3d_p_relu(dl_matrix3d_t *in, dl_matrix3d_t *alpha);
+
+/**
+ * @brief Do a leaky relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param alpha     If value is less than zero, it will be updated by multiplying this factor
+ */
+void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t alpha);
+
+//
+// Conv 1x1
+//
+/**
+ * @brief Do 1x1 convolution with a matrix3d
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ */
+void dl_matrix3dff_conv_1x1(dl_matrix3d_t *out,
+                            dl_matrix3d_t *in,
+                            dl_matrix3d_t *filter);
+
+/**
+ * @brief Do 1x1 convolution with a matrix3d, with bias adding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ * @param bias       Bias, size (1, 1, 1, n)
+ */
+void dl_matrix3dff_conv_1x1_with_bias(dl_matrix3d_t *out,
+                                      dl_matrix3d_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias);
+
+/**
+ * @brief Do 1x1 convolution with an 8-bit fixed point matrix
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ */
+void dl_matrix3duf_conv_1x1(dl_matrix3d_t *out,
+                            dl_matrix3du_t *in,
+                            dl_matrix3d_t *filter);
+
+/**
+ * @brief Do 1x1 convolution with an 8-bit fixed point matrix, with bias adding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)  
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ * @param bias       Bias, size (1, 1, 1, n)
+ */
+void dl_matrix3duf_conv_1x1_with_bias(dl_matrix3d_t *out,
+                                      dl_matrix3du_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias);
+
+//
+// Conv 3x3
+//
+
+/**
+ * @brief Do 3x3 convolution with a matrix3d, without padding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param f          3x3 filter, size (n, 3, 3, c)
+ * @param step_x     Stride of width
+ * @param step_y     Stride of height
+ */
+void dl_matrix3dff_conv_3x3_op(dl_matrix3d_t *out,
+                               dl_matrix3d_t *in,
+                               dl_matrix3d_t *f,
+                               int step_x,
+                               int step_y);
+
+/**
+ * @brief Do 3x3 convolution with a matrix3d, with bias adding
+ * 
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param filter            3x3 filter, size (n, 3, 3, c)
+ * @param bias              Bias, size (1, 1, 1, n)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
+dl_matrix3d_t *dl_matrix3dff_conv_3x3(dl_matrix3d_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias,
+                                      int stride_x,
+                                      int stride_y,
+                                      dl_padding_type padding);
+
+//
+// Conv Common
+//
+
+/**
+ * @brief Do a general convolution layer pass with an 8-bit fixed point matrix, size is (number, width, height, channel)
+ * 
+ * @param in                Input image
+ * @param filter            Weights of the neurons
+ * @param bias              Bias for the CNN layer
+ * @param stride_x          The step length of the convolution window in x(width) direction
+ * @param stride_y          The step length of the convolution window in y(height) direction
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
+dl_matrix3d_t *dl_matrix3duf_conv_common(dl_matrix3du_t *in,
+                                         dl_matrix3d_t *filter,
+                                         dl_matrix3d_t *bias,
+                                         int stride_x,
+                                         int stride_y,
+                                         dl_padding_type padding);
+
+/**
+ * @brief Do a general convolution layer pass, size is (number, width, height, channel)
+ * 
+ * @param in                Input image
+ * @param filter            Weights of the neurons
+ * @param bias              Bias for the CNN layer
+ * @param stride_x          The step length of the convolution window in x(width) direction
+ * @param stride_y          The step length of the convolution window in y(height) direction
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
+dl_matrix3d_t *dl_matrix3dff_conv_common(dl_matrix3d_t *in,
+                                         dl_matrix3d_t *filter,
+                                         dl_matrix3d_t *bias,
+                                         int stride_x,
+                                         int stride_y,
+                                         dl_padding_type padding);
+
+//
+// Depthwise 3x3
+//
+
+/**
+ * @brief Do 3x3 depthwise convolution with a float matrix3d
+ * 
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param filter              3x3 filter, size (1, 3, 3, c)
+ * @param stride_x            Stride of width
+ * @param stride_y            Stride of height
+ * @param padding             Padding type, 0: valid, 1: same
+ * @return dl_matrix3d_t*     Resulting float matrix3d
+ */
+dl_matrix3d_t *dl_matrix3dff_depthwise_conv_3x3(dl_matrix3d_t *in,
+                                                dl_matrix3d_t *filter,
+                                                int stride_x,
+                                                int stride_y,
+                                                int padding);
+
+/**
+ * @brief Do 3x3 depthwise convolution with a 8-bit fixed point matrix
+ * 
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param filter              3x3 filter, size (1, 3, 3, c)
+ * @param stride_x            Stride of width
+ * @param stride_y            Stride of height
+ * @param padding             Padding type, 0: valid, 1: same
+ * @return dl_matrix3d_t*     Resulting float matrix3d
+ */
+dl_matrix3d_t *dl_matrix3duf_depthwise_conv_3x3(dl_matrix3du_t *in,
+                                                dl_matrix3d_t *filter,
+                                                int stride_x,
+                                                int stride_y,
+                                                int padding);
+
+/**
+ * @brief Do 3x3 depthwise convolution with a float matrix3d, without padding
+ * 
+ * @param out                 Preallocated matrix3d, size (1, w, h, n)
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param f                   3x3 filter, size (1, 3, 3, c)
+ * @param step_x              Stride of width
+ * @param step_y              Stride of height
+ */
+void dl_matrix3dff_depthwise_conv_3x3_op(dl_matrix3d_t *out,
+                                         dl_matrix3d_t *in,
+                                         dl_matrix3d_t *f,
+                                         int step_x,
+                                         int step_y);
+
+//
+// Depthwise Common
+//
+
+/**
+ * @brief Do a depthwise CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3dff_depthwise_conv_common(dl_matrix3d_t *in,
+                                                   dl_matrix3d_t *filter,
+                                                   int stride_x,
+                                                   int stride_y,
+                                                   dl_padding_type padding);
+
+//
+// FC
+//
+/**
+ * @brief Do a general fully connected layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d, size is (1, w, 1, 1)
+ * @param filter         Weights of the neurons, size is (1, w, h, 1)
+ * @param bias           Bias for the fc layer, size is (1, 1, 1, h)
+ * @return               The result of fc layer, size is (1, 1, 1, h)
+ */
+void dl_matrix3dff_fc(dl_matrix3d_t *out,
+                      dl_matrix3d_t *in,
+                      dl_matrix3d_t *filter);
+
+/**
+ * @brief Do fully connected layer forward, with bias adding
+ *
+ * @param out       Preallocated resulting matrix, size (1, 1, 1, h)
+ * @param in        Input matrix, size (1, 1, 1, w)
+ * @param filter    Filter matrix, size (1, w, h, 1)
+ * @param bias      Bias matrix, size (1, 1, 1, h)
+ */
+void dl_matrix3dff_fc_with_bias(dl_matrix3d_t *out,
+                                dl_matrix3d_t *in,
+                                dl_matrix3d_t *filter,
+                                dl_matrix3d_t *bias);
+
+//
+// Mobilenet
+//
+
+/**
+ * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3dff_mobilenet(dl_matrix3d_t *in,
+                                       dl_matrix3d_t *dilate_filter,
+                                       dl_matrix3d_t *dilate_prelu,
+                                       dl_matrix3d_t *depthwise_filter,
+                                       dl_matrix3d_t *depthwise_prelu,
+                                       dl_matrix3d_t *compress_filter,
+                                       dl_matrix3d_t *bias,
+                                       dl_matrix3d_mobilenet_config_t config);
+
+/**
+ * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3du
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3duf_mobilenet(dl_matrix3du_t *in,
+                                       dl_matrix3d_t *dilate_filter,
+                                       dl_matrix3d_t *dilate_prelu,
+                                       dl_matrix3d_t *depthwise_filter,
+                                       dl_matrix3d_t *depthwise_prelu,
+                                       dl_matrix3d_t *compress_filter,
+                                       dl_matrix3d_t *bias,
+                                       dl_matrix3d_mobilenet_config_t config);
--- a/tools/sdk/esp32/include/esp-face/lib/include/dl_lib_matrix3dq.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/dl_lib_matrix3dq.h
--- a/tools/sdk/esp32/include/esp-face/lib/include/frmn.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/frmn.h
@ -0,0 +1,43 @@
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    /**
+     * @brief Forward the face recognition process with frmn model. Calculate in float.
+     *
+     * @param in    Image matrix, rgb888 format, size is 56x56, normalized
+     * @return dl_matrix3d_t* Face ID feature vector, size is 512
+     */
+    dl_matrix3d_t *frmn(dl_matrix3d_t *in);
+    
+    /**@{*/
+    /**
+     * @brief Forward the face recognition process with specified model. Calculate in quantization.
+     *
+     * @param in    Image matrix, rgb888 format, size is 56x56, normalized
+     * @param mode  0: C implement; 1: handwrite xtensa instruction implement
+     * @return      Face ID feature vector, size is 512
+     */
+    dl_matrix3dq_t *frmn_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    dl_matrix3dq_t *frmn2p_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    dl_matrix3dq_t *mfn56_42m_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    dl_matrix3dq_t *mfn56_72m_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    dl_matrix3dq_t *mfn56_112m_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    dl_matrix3dq_t *mfn56_156m_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**@}*/
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/hd_model.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/hd_model.h
@ -0,0 +1,66 @@
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    typedef struct
+    {
+        int num;              /*!< The total number of the boxes */
+        dl_matrix3d_t *cls;   /*!< The class feature map corresponding to the box. size: (height, width, anchor_num, 1) */
+        dl_matrix3d_t *score; /*!< The confidence score feature map of the class corresponding to the box. size: (height, width, anchor_num, 1) */
+        dl_matrix3d_t *boxes; /*!< (x, y, w, h) of the boxes. x and y are the center coordinates. size:(height, width, anchor_num, 4) */
+    } detection_result_t;
+
+    /**
+     * @brief Forward the hand detection process with hd_nano1 model. Calculate in quantization.
+     * 
+     * @param in                      A normalized image matrix in rgb888 format, its width and height must be integer multiples of 16.
+     * @param mode                    0: C implement; 1: handwrite xtensa instruction implement
+     * @return detection_result_t**   Detection results
+     */
+    detection_result_t **hd_nano1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the hand detection process with hd_lite1 model. Calculate in quantization.
+     * 
+     * @param in                      A normalized image matrix in rgb888 format, its width and height must be integer multiples of 32.
+     * @param mode                    0: C implement; 1: handwrite xtensa instruction implement.
+     * @return detection_result_t**   Detection results.
+     */
+    detection_result_t **hd_lite1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Free the single detection result.
+     * 
+     * @param m     The single detection result.
+     */
+    void detection_result_free(detection_result_t *m);
+
+    /**
+     * @brief Free the detection result group from different feature map.
+     * 
+     * @param m       The detection result group
+     * @param length  The number of the detection results
+     */
+    void detection_results_free(detection_result_t **m, int length);
+
+    /**
+     * @brief Test the result of hand detection model.
+     * 
+     */
+    void hd_test();
+
+    /**
+     * @brief Test the forward time of hand detection model.
+     * 
+     */
+    void hd_time_test();
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/hp_model.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/hp_model.h
@ -0,0 +1,43 @@
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    /**
+     * @brief Forward the hand pose estimation process with hp_nano1_ls16 model. Calculate in quantization.
+     * 
+     * @param in                 A normalized image matrix in rgb888 format, its size is (1, 128, 128, 3).
+     * @param mode               0: C implement; 1: handwrite xtensa instruction implement
+     * @return dl_matrix3d_t*    The resulting hand joint point coordinates, the size is (1, 1, 21, 2)
+     */
+    dl_matrix3d_t *hp_nano1_ls16_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the hand pose estimation process with hp_lite1 model. Calculate in quantization.
+     * 
+     * @param in                 A normalized image matrix in rgb888 format, its size is (1, 128, 128, 3).
+     * @param mode               0: C implement; 1: handwrite xtensa instruction implement
+     * @return dl_matrix3d_t*    The resulting hand joint point coordinates, the size is (1, 1, 21, 2)
+     */
+    dl_matrix3d_t *hp_lite1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Test the result of hand pose estimation model.
+     * 
+     */
+    void hp_test();
+
+    /**
+     * @brief Test the forward time of hand pose estimation model.
+     * 
+     */
+    void hp_time_test();
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/lssh.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/lssh.h
@ -0,0 +1,91 @@
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+#include "freertos/FreeRTOS.h"
+
+    typedef struct
+    {
+        int resized_height;
+        int resized_width;
+        fptp_t y_resize_scale;
+        fptp_t x_resize_scale;
+        int enabled_top_k;
+        fptp_t score_threshold;
+        fptp_t nms_threshold;
+
+        dl_conv_mode mode;
+    } lssh_config_t;
+
+    typedef struct
+    {
+        int *anchor_size;
+        int stride;
+        int boundary;
+    } lssh_module_config_t;
+
+    typedef struct
+    {
+        lssh_module_config_t *module_config;
+        int number;
+    } lssh_modules_config_t;
+
+    typedef struct
+    {
+        dl_matrix3d_t *category;
+        dl_matrix3d_t *box_offset;
+        dl_matrix3d_t *landmark_offset;
+    } lssh_module_result_t;
+
+    /**
+     * @brief 
+     * 
+     * @param value 
+     */
+    void lssh_module_result_free(lssh_module_result_t value);
+
+    /**
+     * @brief 
+     * 
+     * @param values 
+     * @param length 
+     */
+    void lssh_module_results_free(lssh_module_result_t *values, int length);
+
+    /////////////////////////
+    //////sparse_mn_5_q//////
+    /////////////////////////
+    extern lssh_modules_config_t sparse_mn_5_modules_config;
+    lssh_module_result_t *sparse_mn_5_q_without_landmark(dl_matrix3du_t *image, bool free_image, int enabled_top_k, dl_conv_mode mode);
+    lssh_module_result_t *sparse_mn_5_q_with_landmark(dl_matrix3du_t *image, bool free_image, int enabled_top_k, dl_conv_mode mode);
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/esp32/include/esp-face/lib/include/mtmn.h
+++ b/tools/sdk/esp32/include/esp-face/lib/include/mtmn.h
@ -0,0 +1,142 @@
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    /**
+     * Detection results with MTMN.
+     *
+     */
+    typedef struct
+    {
+        dl_matrix3d_t *category;    /*!< Classification result after softmax, channel is 2 */
+        dl_matrix3d_t *offset;      /*!< Bounding box offset of 2 points: top-left and bottom-right, channel is 4 */
+        dl_matrix3d_t *landmark;    /*!< Offsets of 5 landmarks:
+                                     * - Left eye
+                                     * - Mouth leftside
+                                     * - Nose
+                                     * - Right eye
+                                     * - Mouth rightside
+                                     *
+                                     * channel is 10
+                                     * */
+    } mtmn_net_t;
+
+
+    /**
+     * @brief Free a mtmn_net_t
+     *
+     * @param p         A mtmn_net_t pointer
+     *
+     */
+
+    void mtmn_net_t_free(mtmn_net_t *p);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in float.
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet_lite_f(dl_matrix3du_t *in);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in float.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_lite_f_with_score_verify(dl_matrix3du_t *in, float threshold);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in float.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_lite_f_with_score_verify(dl_matrix3du_t *in, float threshold);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet_lite_q(dl_matrix3du_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_lite_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_lite_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet_heavy_q(dl_matrix3du_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_heavy_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_heavy_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+#ifdef __cplusplus
+}
+#endif