IDF release/v3.3 (#3672)

ESP-IDF release/v3.3: 66d3783c8 esp-face: 420fc7e esp32-camera: 0107093
2020-11-03 21:20:00 +02:00
parent 6e5be78838
commit 22b427df0f
256 changed files with 6074 additions and 1011 deletions
--- a/tools/sdk/include/esp-face/cat_face_3.h
+++ b/tools/sdk/include/esp-face/cat_face_3.h
@ -0,0 +1,40 @@
+/*
+ * ESPRESSIF MIT License
+ *
+ * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+ *
+ * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+ * it is free of charge, to any person_body obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+ * to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or
+ * substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+#include "freertos/FreeRTOS.h"
+#include "detection.h"
+
+    extern detection_model_t cat_face_3_model;
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/detection.h
+++ b/tools/sdk/include/esp-face/detection.h
@ -0,0 +1,87 @@
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+#include "freertos/FreeRTOS.h"
+
+    typedef enum
+    {
+        Anchor_Point, /*<! Anchor point detection model*/
+        Anchor_Box    /*<! Anchor box detection model */
+    } detection_model_type_t;
+
+    typedef struct
+    {
+        int **anchors_shape; /*<! Anchor shape of this stage */
+        int stride;          /*<! Zoom in stride of this stage */
+        int boundary;        /*<! Detection image low-limit of this stage */
+        int project_offset;  /*<! Project offset of this stage */
+    } detection_stage_config_t;
+
+    typedef struct
+    {
+        dl_matrix3dq_t *score;           /*<! score feature map of this stage*/
+        dl_matrix3dq_t *box_offset;      /*<! box_offset feature map of this stage*/
+        dl_matrix3dq_t *landmark_offset; /*<! landmark_offset feature map of this stage */
+    } detection_stage_result_t;
+
+    typedef struct
+    {
+        int resized_height;    /*<! The height after resized */
+        int resized_width;     /*<! The width after resized */
+        fptp_t y_resize_scale; /*<! resized_height / input_height */
+        fptp_t x_resize_scale; /*<! resized_width / input_width */
+        qtp_t score_threshold; /*<! Score threshold of detection model */
+        fptp_t nms_threshold;  /*<! NMS threshold of detection model */
+        bool with_landmark;    /*<! Whether detection with landmark, true: with, false: without */
+        bool free_image;       /*<! Whether free the resized image */
+        int enabled_top_k;     /*<! The number of enabled stages */
+    } detection_model_config_t;
+
+    typedef struct
+    {
+        detection_stage_config_t *stage_config;                                                                      /*<! Configuration of each stage */
+        int stage_number;                                                                                            /*<! The number of stages */
+        detection_model_type_t model_type;                                                                           /*<! The type of detection model */
+        detection_model_config_t model_config;                                                                       /*<! Configuration of detection model */
+        detection_stage_result_t *(*op)(dl_matrix3dq_t *, detection_model_config_t *);                               /*<! The function of detection inference */
+        void *(*get_boxes)(detection_stage_result_t *, detection_model_config_t *, detection_stage_config_t *, int); /*<! The function of how to get real boxes */
+    } detection_model_t;
+
+    /**
+     * @brief free 'detection_stage_result_t' type value
+     * 
+     * @param value A 'detection_stage_result_t' type value
+     */
+    void free_detection_stage_result(detection_stage_result_t value);
+
+#ifdef __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/dl_lib_matrix3d.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3d.h
@ -7,10 +7,14 @@
 #include <math.h>
 #include <assert.h>

-#if CONFIG_SPIRAM_SUPPORT
+#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT
 #include "freertos/FreeRTOS.h"
+#define DL_SPIRAM_SUPPORT 1
+#else
+#define DL_SPIRAM_SUPPORT 0
 #endif

+
 #ifndef max
 #define max(x, y) (((x) < (y)) ? (y) : (x))
 #endif
@ -30,17 +34,17 @@ typedef enum

 typedef enum
 {
-    PADDING_VALID = 0,
-    PADDING_SAME = 1,
-    PADDING_SAME_DONT_FREE_INPUT = 2,
-    PADDING_SAME_MXNET = 3,
+    PADDING_VALID = 0,                   /*!< Valid padding */
+    PADDING_SAME = 1,                    /*!< Same padding, from right to left, free input */
+    PADDING_SAME_DONT_FREE_INPUT = 2,    /*!< Same padding, from right to left, do not free input */
+    PADDING_SAME_MXNET = 3,              /*!< Same padding, from left to right */
 } dl_padding_type;

 typedef enum
 {
-    DL_POOLING_MAX = 0,
-    DL_POOLING_AVG = 1,
-} dl_pooling_type;
+    DL_POOLING_MAX = 0,        /*!< Max pooling */
+    DL_POOLING_AVG = 1,        /*!< Average pooling */
+} dl_pooling_type; 
 /*
 * Matrix for 3d
 * @Warning: the sequence of variables is fixed, cannot be modified, otherwise there will be errors in esp_dsp_dot_float
@ -65,11 +69,17 @@ typedef struct
    uc_t *item; /*!< Data */
 } dl_matrix3du_t;

+typedef enum
+{
+    UPSAMPLE_NEAREST_NEIGHBOR = 0, /*!< Use nearest neighbor interpolation as the upsample method*/
+    UPSAMPLE_BILINEAR = 1,        /*!< Use nearest bilinear interpolation as the upsample method*/
+} dl_upsample_type;
+
 typedef struct
 {
-    int stride_x;
-    int stride_y;
-    dl_padding_type padding;
+    int stride_x;                    /*!< Strides of width */
+    int stride_y;                    /*!< Strides of height */
+    dl_padding_type padding;         /*!< Padding type */
 } dl_matrix3d_mobilenet_config_t;

 /*
@ -80,20 +90,20 @@ typedef struct
 * @param align Align of memory. If not required, set 0.
 * @return Pointer of allocated memory. Null for failed.
 */
-static inline void *dl_lib_calloc(int cnt, int size, int align)
+static void *dl_lib_calloc(int cnt, int size, int align)
 {
    int total_size = cnt * size + align + sizeof(void *);
    void *res = malloc(total_size);
    if (NULL == res)
    {
-#if CONFIG_SPIRAM_SUPPORT
+#if DL_SPIRAM_SUPPORT
        res = heap_caps_malloc(total_size, MALLOC_CAP_8BIT | MALLOC_CAP_SPIRAM);
    }
    if (NULL == res)
    {
        printf("Item psram alloc failed. Size: %d x %d\n", cnt, size);
 #else
-        printf("Item alloc failed. Size: %d x %d\n", cnt, size);
+        printf("Item alloc failed. Size: %d x %d, SPIRAM_FLAG: %d\n", cnt, size, DL_SPIRAM_SUPPORT);
 #endif
        return NULL;
    }
@ -109,6 +119,10 @@ static inline void *dl_lib_calloc(int cnt, int size, int align)
    return (void *)aligned;
 }

+/**
+ * @brief Free the memory space allocated by 'dl_lib_calloc'
+ * 
+ */
 static inline void dl_lib_free(void *d)
 {
    if (NULL == d)
@ -286,15 +300,15 @@ void dl_matrix3d_sliced_transform_nchw(dl_matrix3d_t *out,
 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
 *
- * @param in             Input matrix3d
- * @param filter         Weights of the neurons
- * @param bias           Bias for the CNN layer
- * @param stride_x       The step length of the convolution window in x(width) direction
- * @param stride_y       The step length of the convolution window in y(height) direction
- * @param padding        One of VALID or SAME
- * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
- *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
- * @return               The result of CNN layer
+ * @param in               Input matrix3d
+ * @param filter           Weights of the neurons
+ * @param bias             Bias for the CNN layer
+ * @param stride_x         The step length of the convolution window in x(width) direction
+ * @param stride_y         The step length of the convolution window in y(height) direction
+ * @param padding          One of VALID or SAME
+ * @param mode             Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                         If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return dl_matrix3d_t*  The result of CNN layer
 */
 dl_matrix3d_t *dl_matrix3d_conv(dl_matrix3d_t *in,
                                dl_matrix3d_t *filter,
@ -316,14 +330,14 @@ dl_matrix3d_t *dl_matrix3d_global_pool(dl_matrix3d_t *in);
 /**
 * @brief Calculate pooling layer of a feature map
 *
- * @param in        Input matrix, size (1, w, h, c)
- * @param f_w       Window width
- * @param f_h       Window height 
- * @param stride_x  Stride in horizontal direction
- * @param stride_y  Stride in vertical direction
- * @param padding   Padding type: PADDING_VALID and PADDING_SAME
- * @param pooling_type   Pooling type: DL_POOLING_MAX and POOLING_AVG
- * @return          Resulting matrix, size (1, w', h', c)
+ * @param in               Input matrix, size (1, w, h, c)
+ * @param f_w              Window width
+ * @param f_h              Window height 
+ * @param stride_x         Stride in horizontal direction
+ * @param stride_y         Stride in vertical direction
+ * @param padding          Padding type: PADDING_VALID and PADDING_SAME
+ * @param pooling_type     Pooling type: DL_POOLING_MAX and POOLING_AVG
+ * @return dl_matrix3d_t*  Resulting matrix, size (1, w', h', c)
 */
 dl_matrix3d_t *dl_matrix3d_pooling(dl_matrix3d_t *in,
                                   int f_w,
@ -346,20 +360,20 @@ void dl_matrix3d_batch_normalize(dl_matrix3d_t *m,
 /**
 * @brief Add a pair of matrix3d item-by-item: res=in_1+in_2
 *
- * @param in_1           First Floating point input matrix3d
- * @param in_2           Second Floating point input matrix3d
+ * @param in_1             First Floating point input matrix3d
+ * @param in_2             Second Floating point input matrix3d
 *
- * @return               Added data
+ * @return dl_matrix3d_t*  Added data
 */
 dl_matrix3d_t *dl_matrix3d_add(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);

 /**
 * @brief Concatenate the channels of two matrix3ds into a new matrix3d
 *
- * @param in_1           First Floating point input matrix3d
- * @param in_2           Second Floating point input matrix3d
+ * @param in_1             First Floating point input matrix3d
+ * @param in_2             Second Floating point input matrix3d
 *
- * @return               A newly allocated matrix3d with as avlues in_1|in_2
+ * @return dl_matrix3d_t*  A newly allocated matrix3d with as avlues in_1|in_2
 */
 dl_matrix3d_t *dl_matrix3d_concat(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);

@ -477,8 +491,21 @@ dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(dl_matrix3d_t *in,
                                                 int mode,
                                                 int shortcut);

+/**
+ * @brief           Initialize the matrix3d feature map to bias
+ * 
+ * @param out       The matrix3d feature map needs to be initialized
+ * @param bias      The bias of a convlotion operation
+ */
 void dl_matrix3d_init_bias(dl_matrix3d_t *out, dl_matrix3d_t *bias);

+/**
+ * @brief  Do a elementwise multiplication of two matrix3ds
+ * 
+ * @param out  Preallocated matrix3d, size (n, w, h, c)
+ * @param in1  Input matrix 1, size (n, w, h, c)
+ * @param in2  Input matrix 2, size (n, w, h, c)
+ */
 void dl_matrix3d_multiply(dl_matrix3d_t *out, dl_matrix3d_t *in1, dl_matrix3d_t *in2);

 //
@ -519,19 +546,49 @@ void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t alpha);
 //
 // Conv 1x1
 //
+/**
+ * @brief Do 1x1 convolution with a matrix3d
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ */
 void dl_matrix3dff_conv_1x1(dl_matrix3d_t *out,
                            dl_matrix3d_t *in,
                            dl_matrix3d_t *filter);

+/**
+ * @brief Do 1x1 convolution with a matrix3d, with bias adding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ * @param bias       Bias, size (1, 1, 1, n)
+ */
 void dl_matrix3dff_conv_1x1_with_bias(dl_matrix3d_t *out,
                                      dl_matrix3d_t *in,
                                      dl_matrix3d_t *filter,
                                      dl_matrix3d_t *bias);

+/**
+ * @brief Do 1x1 convolution with an 8-bit fixed point matrix
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ */
 void dl_matrix3duf_conv_1x1(dl_matrix3d_t *out,
                            dl_matrix3du_t *in,
                            dl_matrix3d_t *filter);

+/**
+ * @brief Do 1x1 convolution with an 8-bit fixed point matrix, with bias adding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)  
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param filter     1x1 filter, size (n, 1, 1, c)
+ * @param bias       Bias, size (1, 1, 1, n)
+ */
 void dl_matrix3duf_conv_1x1_with_bias(dl_matrix3d_t *out,
                                      dl_matrix3du_t *in,
                                      dl_matrix3d_t *filter,
@ -540,12 +597,33 @@ void dl_matrix3duf_conv_1x1_with_bias(dl_matrix3d_t *out,
 //
 // Conv 3x3
 //
+
+/**
+ * @brief Do 3x3 convolution with a matrix3d, without padding
+ * 
+ * @param out        Preallocated matrix3d, size (1, w, h, n)
+ * @param in         Input matrix, size (1, w, h, c)
+ * @param f          3x3 filter, size (n, 3, 3, c)
+ * @param step_x     Stride of width
+ * @param step_y     Stride of height
+ */
 void dl_matrix3dff_conv_3x3_op(dl_matrix3d_t *out,
                               dl_matrix3d_t *in,
                               dl_matrix3d_t *f,
                               int step_x,
                               int step_y);

+/**
+ * @brief Do 3x3 convolution with a matrix3d, with bias adding
+ * 
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param filter            3x3 filter, size (n, 3, 3, c)
+ * @param bias              Bias, size (1, 1, 1, n)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
 dl_matrix3d_t *dl_matrix3dff_conv_3x3(dl_matrix3d_t *in,
                                      dl_matrix3d_t *filter,
                                      dl_matrix3d_t *bias,
@ -557,6 +635,17 @@ dl_matrix3d_t *dl_matrix3dff_conv_3x3(dl_matrix3d_t *in,
 // Conv Common
 //

+/**
+ * @brief Do a general convolution layer pass with an 8-bit fixed point matrix, size is (number, width, height, channel)
+ * 
+ * @param in                Input image
+ * @param filter            Weights of the neurons
+ * @param bias              Bias for the CNN layer
+ * @param stride_x          The step length of the convolution window in x(width) direction
+ * @param stride_y          The step length of the convolution window in y(height) direction
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
 dl_matrix3d_t *dl_matrix3duf_conv_common(dl_matrix3du_t *in,
                                         dl_matrix3d_t *filter,
                                         dl_matrix3d_t *bias,
@ -564,6 +653,17 @@ dl_matrix3d_t *dl_matrix3duf_conv_common(dl_matrix3du_t *in,
                                         int stride_y,
                                         dl_padding_type padding);

+/**
+ * @brief Do a general convolution layer pass, size is (number, width, height, channel)
+ * 
+ * @param in                Input image
+ * @param filter            Weights of the neurons
+ * @param bias              Bias for the CNN layer
+ * @param stride_x          The step length of the convolution window in x(width) direction
+ * @param stride_y          The step length of the convolution window in y(height) direction
+ * @param padding           Padding type
+ * @return dl_matrix3d_t*   Resulting matrix3d
+ */
 dl_matrix3d_t *dl_matrix3dff_conv_common(dl_matrix3d_t *in,
                                         dl_matrix3d_t *filter,
                                         dl_matrix3d_t *bias,
@ -575,18 +675,47 @@ dl_matrix3d_t *dl_matrix3dff_conv_common(dl_matrix3d_t *in,
 // Depthwise 3x3
 //

+/**
+ * @brief Do 3x3 depthwise convolution with a float matrix3d
+ * 
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param filter              3x3 filter, size (1, 3, 3, c)
+ * @param stride_x            Stride of width
+ * @param stride_y            Stride of height
+ * @param padding             Padding type, 0: valid, 1: same
+ * @return dl_matrix3d_t*     Resulting float matrix3d
+ */
 dl_matrix3d_t *dl_matrix3dff_depthwise_conv_3x3(dl_matrix3d_t *in,
                                                dl_matrix3d_t *filter,
                                                int stride_x,
                                                int stride_y,
                                                int padding);

+/**
+ * @brief Do 3x3 depthwise convolution with a 8-bit fixed point matrix
+ * 
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param filter              3x3 filter, size (1, 3, 3, c)
+ * @param stride_x            Stride of width
+ * @param stride_y            Stride of height
+ * @param padding             Padding type, 0: valid, 1: same
+ * @return dl_matrix3d_t*     Resulting float matrix3d
+ */
 dl_matrix3d_t *dl_matrix3duf_depthwise_conv_3x3(dl_matrix3du_t *in,
                                                dl_matrix3d_t *filter,
                                                int stride_x,
                                                int stride_y,
                                                int padding);

+/**
+ * @brief Do 3x3 depthwise convolution with a float matrix3d, without padding
+ * 
+ * @param out                 Preallocated matrix3d, size (1, w, h, n)
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param f                   3x3 filter, size (1, 3, 3, c)
+ * @param step_x              Stride of width
+ * @param step_y              Stride of height
+ */
 void dl_matrix3dff_depthwise_conv_3x3_op(dl_matrix3d_t *out,
                                         dl_matrix3d_t *in,
                                         dl_matrix3d_t *f,
@ -630,6 +759,14 @@ void dl_matrix3dff_fc(dl_matrix3d_t *out,
                      dl_matrix3d_t *in,
                      dl_matrix3d_t *filter);

+/**
+ * @brief Do fully connected layer forward, with bias adding
+ *
+ * @param out       Preallocated resulting matrix, size (1, 1, 1, h)
+ * @param in        Input matrix, size (1, 1, 1, w)
+ * @param filter    Filter matrix, size (1, w, h, 1)
+ * @param bias      Bias matrix, size (1, 1, 1, h)
+ */
 void dl_matrix3dff_fc_with_bias(dl_matrix3d_t *out,
                                dl_matrix3d_t *in,
                                dl_matrix3d_t *filter,
--- a/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
@ -56,6 +56,20 @@ typedef struct
    int compress_exponent;   /*!< Exponent of compress filter */
 } dl_matrix3dq_mobilenet_config_t;

+typedef struct
+{
+    int stride_x;            /*!< Strides of width */
+    int stride_y;            /*!< Strides of height */
+    dl_padding_type padding; /*!< Padding type */
+    dl_conv_mode mode;       /*!< Implementation mode */
+    int dw1_exponent;        /*!< Exponent of dw1 filter */
+    int pw1_exponent;        /*!< Exponent of pw1 filter */
+    int dw2_exponent;        /*!< Exponent of dw2 filter */
+    int pw2_exponent;        /*!< Exponent of pw2 filter */
+    int shortcut;           /*!< Shortcut connection flag */
+    int save_input;         /*!< Input save flag */
+} dl_matrix3dq_blazeblock_config_t;
+
 //
 // Utility
 //
@ -163,6 +177,13 @@ dl_matrix3dq_t *dl_matrixq_from_matrix3d_qmf(dl_matrix3d_t *m, int exponent);
 */
 dl_matrix3dq_t *dl_matrixq_from_matrix3d(dl_matrix3d_t *m);

+/**
+ * @brief Truncate the overflowed 16bit number
+ * 
+ * @param value               Input value
+ * @param location            Location tag
+ * @return qtp_t              Truncated value
+ */
 qtp_t dl_matrix3dq_quant_range_exceeded_checking(int64_t value, char *location);

 /**
@ -186,13 +207,23 @@ void dl_matrix3dq_batch_normalize(dl_matrix3dq_t *m, dl_matrix3dq_t *scale, dl_m
 /**
 * @brief Add two quantized matrix with a pre-defined exponent
 *
- * @param in_1      Adder 1
- * @param in_2      Adder 2
- * @param exponent  Exponent for resulting matrix
- * @return          Result of accumulation of two matrix
+ * @param in_1                     Adder 1
+ * @param in_2                     Adder 2
+ * @param exponent                 Exponent for resulting matrix
+ * @return dl_matrix3dq_t*         Result of accumulation of two matrix
 */
 dl_matrix3dq_t *dl_matrix3dq_add(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, int exponent);

+/**
+ * @brief Add two quantized matrix with different channels 
+ * 
+ * @param in_1               Adder 1
+ * @param in_2               Adder 2
+ * @param exponent           Exponent for resulting matrix
+ * @return dl_matrix3dq_t*   Result of accumulation of two matrix
+ */
+dl_matrix3dq_t *dl_matrix3dq_add_channel_diff(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, int exponent);
+
 //
 // Activation
 //
@ -287,6 +318,7 @@ dl_matrix3dq_t *dl_matrix3dq_concat_8(dl_matrix3dq_t *in_1,
 * @param in        Input matrix, size (1, w, h, c)
 * @param filter    1x1 filter, size (n, 1, 1, c)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_conv_1x1(dl_matrix3dq_t *out,
                            dl_matrix3dq_t *in,
@ -301,6 +333,7 @@ void dl_matrix3dqq_conv_1x1(dl_matrix3dq_t *out,
 * @param in        Input matrix, size (1, w, h, c)
 * @param filter    1x1 filter, size (n, 1, 1, c)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_conv_1x1_with_relu(dl_matrix3dq_t *out,
                                      dl_matrix3dq_t *in,
@ -326,13 +359,14 @@ void dl_matrix3dqq_conv_1x1_with_bias(dl_matrix3dq_t *out,
                                      char *name);

 /**
- * @brief Do 1x1 convolution with a quantized matrix, with bias adding and relu activation
+ * @brief Do 1x1 convolution with a quantized matrix, with bias adding
 *
 * @param out       Preallocated quantized matrix, size (1, w, h, n)
 * @param in        Input matrix, size (1, w, h, c)
 * @param filter    1x1 filter, size (n, 1, 1, c)
 * @param bias      Bias, size (1, 1, 1, n)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_conv_1x1_with_bias_relu(dl_matrix3dq_t *out,
                                           dl_matrix3dq_t *in,
@ -342,14 +376,14 @@ void dl_matrix3dqq_conv_1x1_with_bias_relu(dl_matrix3dq_t *out,
                                           char *name);

 /**
- * @brief 
+ * @brief Do 1x1 convolution with a quantized matrix, with prelu activation         
 * 
- * @param out 
- * @param in 
- * @param filter 
- * @param prelu 
- * @param mode 
- * @param name 
+ * @param out       Preallocated quantized matrix, size (1, w, h, n)
+ * @param in        Input matrix, size (1, w, h, c)
+ * @param filter    1x1 filter, size (n, 1, 1, c)
+ * @param prelu     prelu params, size (1, 1, 1, n)
+ * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_conv_1x1_with_prelu(dl_matrix3dq_t *out,
                                       dl_matrix3dq_t *in,
@ -365,6 +399,7 @@ void dl_matrix3dqq_conv_1x1_with_prelu(dl_matrix3dq_t *out,
 * @param in        Input matrix, size (1, w, h, c)
 * @param filter    1x1 filter, size (n, 1, 1, c)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3duq_conv_1x1(dl_matrix3dq_t *out,
                            dl_matrix3du_t *in,
@ -380,6 +415,7 @@ void dl_matrix3duq_conv_1x1(dl_matrix3dq_t *out,
 * @param filter    1x1 filter, size (n, 1, 1, c)
 * @param bias      Bias, size (1, 1, 1, n)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3duq_conv_1x1_with_bias(dl_matrix3dq_t *out,
                                      dl_matrix3du_t *in,
@ -401,7 +437,7 @@ void dl_matrix3duq_conv_1x1_with_bias(dl_matrix3dq_t *out,
 * @param stride_y          Stride of height
 * @param padding           Padding type, 0: valid, 1: same
 * @param exponent          Exponent for resulting matrix
- * @param name 
+ * @param name              Layer name to debug 
 * @return dl_matrix3dq_t*  Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_conv_3x3(dl_matrix3dq_t *input,
@ -420,9 +456,9 @@ dl_matrix3dq_t *dl_matrix3dqq_conv_3x3(dl_matrix3dq_t *input,
 * @param bias              Bias, size (1, 1, 1, n)
 * @param stride_x          Stride of width
 * @param stride_y          Stride of height
- * @param padding 
+ * @param padding           Padding type
 * @param exponent          Exponent for resulting matrix
- * @param name 
+ * @param name              Layer name to debug 
 * @return dl_matrix3dq_t*  Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias(dl_matrix3dq_t *input,
@ -442,9 +478,9 @@ dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias(dl_matrix3dq_t *input,
 * @param bias              Bias, size (1, 1, 1, n)
 * @param stride_x          Stride of width
 * @param stride_y          Stride of height
- * @param padding 
+ * @param padding           Padding type
 * @param exponent          Exponent for resulting matrix
- * @param name 
+ * @param name              Layer name to debug 
 * @return dl_matrix3dq_t*  Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias_relu(dl_matrix3dq_t *input,
@ -457,17 +493,17 @@ dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias_relu(dl_matrix3dq_t *input,
                                                      char *name);

 /**
- * @brief 
+ * @brief  Do 3x3 convolution with an 8-bit fixed point matrix, with bias adding
 * 
- * @param input 
- * @param filter 
- * @param bias 
- * @param stride_x 
- * @param stride_y 
- * @param padding 
- * @param exponent 
- * @param name 
- * @return dl_matrix3dq_t* 
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param filter            3x3 filter, size (n, 3, 3, c)
+ * @param bias              Bias, size (1, 1, 1, n)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param padding           Padding type
+ * @param exponent          Exponent for resulting matrix
+ * @param name              Layer name to debug 
+ * @return dl_matrix3dq_t*  Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias(dl_matrix3du_t *input,
                                                 dl_matrix3dq_t *filter,
@ -479,18 +515,18 @@ dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias(dl_matrix3du_t *input,
                                                 char *name);

 /**
- * @brief 
+ * @brief   Do 3x3 convolution with an 8-bit fixed point matrix, with bias adding, prelu activation
 * 
- * @param input 
- * @param filter 
- * @param bias 
- * @param prelu 
- * @param stride_x 
- * @param stride_y 
- * @param padding 
- * @param exponent 
- * @param name 
- * @return dl_matrix3dq_t* 
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param filter            3x3 filter, size (n, 3, 3, c)
+ * @param bias              Bias, size (1, 1, 1, n)
+ * @param prelu             prelu params, size (1, 1, 1, n)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param padding           Padding type
+ * @param exponent          Exponent for resulting matrix
+ * @param name              Layer name to debug
+ * @return dl_matrix3dq_t*  Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias_prelu(dl_matrix3du_t *input,
                                                       dl_matrix3dq_t *filter,
@ -503,7 +539,20 @@ dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias_prelu(dl_matrix3du_t *input,
                                                       char *name);


-
+/**
+ * @brief   Do 3x3 convolution with a quantized matrix, with bias adding, prelu activation
+ * 
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param filter            3x3 filter, size (n, 3, 3, c)
+ * @param bias              Bias, size (1, 1, 1, n)
+ * @param prelu             prelu params, size (1, 1, 1, n)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param padding           Padding type
+ * @param exponent          Exponent for resulting matrix
+ * @param name              Layer name to debug
+ * @return dl_matrix3dq_t*  Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias_prelu(dl_matrix3dq_t *input,
                                                       dl_matrix3dq_t *filter,
                                                       dl_matrix3dq_t *bias,
@ -573,6 +622,7 @@ dl_matrix3dq_t *dl_matrix3duq_conv_common(dl_matrix3du_t *in,
 * @param stride_y  Stride of height
 * @param padding   Padding type, 0: valid, 1: same
 * @param exponent  Exponent for resulting matrix
+ * @param name      Layer name to debug
 * @return          Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_3x3(dl_matrix3du_t *in,
@ -591,7 +641,9 @@ dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_3x3(dl_matrix3du_t *in,
 * @param stride_x  Stride of width
 * @param stride_y  Stride of height
 * @param padding   Padding type, 0: valid, 1: same
+ * @param relu      ReLU, 0: don't, 1: do
 * @param exponent  Exponent for resulting matrix
+ * @param name      Layer name to debug
 * @return          Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3(dl_matrix3dq_t *in,
@ -599,6 +651,7 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3(dl_matrix3dq_t *in,
                                                 int stride_x,
                                                 int stride_y,
                                                 dl_padding_type padding,
+                                                 int relu,
                                                 int exponent,
                                                 char *name);

@ -624,13 +677,14 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_3(dl_matrix3dq_t *in,
 * @brief Do 3x3 depthwise convolution with a quantized matrix, with bias adding
 *
 * @param in        Input matrix, size (1, w, h, c)
- * @param filter    3x3 filter, size (1, 3, 3, c)
+ * @param f         3x3 filter, size (1, 3, 3, c)
 * @param bias      Bias, size (1, 1, 1, c)
 * @param stride_x  Stride of width
 * @param stride_y  Stride of height
 * @param padding   Padding type, 0: valid, 1: same
 * @param exponent  Exponent for resulting matrix
 * @param relu      Whether to use relu activation
+ * @param name      Layer name to debug
 * @return          Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_bias(dl_matrix3dq_t *in,
@ -647,11 +701,12 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_bias(dl_matrix3dq_t *in,
 * @brief Do 3x3 depthwise convolution with a quantized matrix, with bias adding and stride 1
 *
 * @param in        Input matrix, size (1, w, h, c)
- * @param filter    3x3 filter, size (1, 3, 3, c)
- * @param bias      Bias, size (1, 1, 1, n)
+ * @param f         3x3 filter, size (1, 3, 3, c)
+ * @param bias      Bias, size (1, 1, 1, c)
 * @param padding   Padding type, 0: valid, 1: same
 * @param exponent  Exponent for resulting matrix
 * @param relu      Whether to use relu activation
+ * @param name      Layer name to debug
 * @return          Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3s1_with_bias(dl_matrix3dq_t *in,
@ -662,6 +717,19 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3s1_with_bias(dl_matrix3dq_t *in,
                                                             int relu,
                                                             char *name);

+/**
+ * @brief Do 3x3 depthwise convolution with a quantized matrix, with prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param filter           3x3 filter, size (1, 3, 3, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_prelu(dl_matrix3dq_t *in,
                                                            dl_matrix3dq_t *filter,
                                                            dl_matrix3dq_t *prelu,
@ -671,6 +739,20 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_prelu(dl_matrix3dq_t *in,
                                                            int exponent,
                                                            char *name);

+/**
+ * @brief Do 3x3 depthwise convolution with a quantized matrix, with bias adding and prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param f                3x3 filter, size (1, 3, 3, c)
+ * @param bias             Bias, size (1, 1, 1, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_bias_prelu(dl_matrix3dq_t *in,
                                                           dl_matrix3dq_t *f,
                                                           dl_matrix3dq_t *bias,
@ -681,16 +763,226 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_bias_prelu(dl_matrix3dq_t
                                                           int exponent,
                                                           char *name);

+/**
+ * @brief Do global depthwise convolution with a quantized matrix, with bias adding
+ * 
+ * @param in                 Input matrix, size (1, w, h, c)    
+ * @param filter             filter, size (1, w, h, c)
+ * @param bias               Bias, size (1, 1, 1, c)
+ * @param exponent           Exponent for resulting matrix
+ * @param name               Layer name to debug
+ * @return dl_matrix3dq_t*   Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_global_depthwise_conv_with_bias(dl_matrix3dq_t *in,
                                                    dl_matrix3dq_t *filter,
                                                    dl_matrix3dq_t *bias,
                                                    int exponent,
                                                    char *name);
+
+
+
+//
+// Depthwise 2x2
+//
+/**
+ * @brief Do 2x2 depthwise convolution with a quantized matrix
+ *
+ * @param in        Input matrix, size (1, w, h, c)
+ * @param filter    2x2 filter, size (1, 2, 2, c)
+ * @param stride_x  Stride of width
+ * @param stride_y  Stride of height
+ * @param padding   Padding type, 0: valid, 1: same
+ * @param exponent  Exponent for resulting matrix
+ * @param name      Layer name to debug
+ * @return          Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_2x2(dl_matrix3dq_t *in,
+                                                 dl_matrix3dq_t *filter,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent,
+                                                 char *name);
+
+/**
+ * @brief Do 2x2 depthwise convolution with a quantized matrix, with bias adding
+ *
+ * @param in        Input matrix, size (1, w, h, c)
+ * @param f         2x2 filter, size (1, 2, 2, c)
+ * @param bias      Bias, size (1, 1, 1, c)
+ * @param stride_x  Stride of width
+ * @param stride_y  Stride of height
+ * @param padding   Padding type, 0: valid, 1: same
+ * @param exponent  Exponent for resulting matrix
+ * @param relu      Whether to use relu activation
+ * @param name      Layer name to debug
+ * @return          Resulting quantized matrix
+ */                                        
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_2x2_with_bias(dl_matrix3dq_t *in,
+                                                           dl_matrix3dq_t *f,
+                                                           dl_matrix3dq_t *bias,
+                                                           int stride_x,
+                                                           int stride_y,
+                                                           dl_padding_type padding,
+                                                           int exponent,
+                                                           int relu,
+                                                           char *name);
+
+/**
+ * @brief Do 2x2 depthwise convolution with a quantized matrix, with prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param filter           2x2 filter, size (1, 2, 2, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_2x2_with_prelu(dl_matrix3dq_t *in,
+                                                            dl_matrix3dq_t *filter,
+                                                            dl_matrix3dq_t *prelu,
+                                                            int stride_x,
+                                                            int stride_y,
+                                                            dl_padding_type padding,
+                                                            int exponent,
+                                                            char *name);
+
+/**
+ * @brief Do 2x2 depthwise convolution with a quantized matrix, with bias adding and prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param f                2x2 filter, size (1, 2, 2, c)
+ * @param bias             Bias, size (1, 1, 1, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_2x2_with_bias_prelu(dl_matrix3dq_t *in,
+                                                                 dl_matrix3dq_t *f,
+                                                                 dl_matrix3dq_t *bias,
+                                                                 dl_matrix3dq_t *prelu,
+                                                                 int stride_x,
+                                                                 int stride_y,
+                                                                 dl_padding_type padding,
+                                                                 int exponent,
+                                                                 char *name);
+
+//
+// Depthwise 5x5
+//
+/**
+ * @brief Do 5x5 depthwise convolution with a quantized matrix
+ *
+ * @param in        Input matrix, size (1, w, h, c)
+ * @param filter    5x5 filter, size (1, 5, 5, c)
+ * @param stride_x  Stride of width
+ * @param stride_y  Stride of height
+ * @param padding   Padding type, 0: valid, 1: same
+ * @param exponent  Exponent for resulting matrix
+ * @param name      Layer name to debug
+ * @return          Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_5x5(dl_matrix3dq_t *in,
+                                                 dl_matrix3dq_t *filter,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent,
+                                                 char *name);
+
+/**
+ * @brief Do 5x5 depthwise convolution with a quantized matrix, with bias adding
+ *
+ * @param in        Input matrix, size (1, w, h, c)
+ * @param f         5x5 filter, size (1, 5, 5, c)
+ * @param bias      Bias, size (1, 1, 1, c)
+ * @param stride_x  Stride of width
+ * @param stride_y  Stride of height
+ * @param padding   Padding type, 0: valid, 1: same
+ * @param exponent  Exponent for resulting matrix
+ * @param relu      Whether to use relu activation
+ * @param name      Layer name to debug
+ * @return          Resulting quantized matrix
+ */                                         
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_5x5_with_bias(dl_matrix3dq_t *in,
+                                                           dl_matrix3dq_t *f,
+                                                           dl_matrix3dq_t *bias,
+                                                           int stride_x,
+                                                           int stride_y,
+                                                           dl_padding_type padding,
+                                                           int exponent,
+                                                           int relu,
+                                                           char *name);
+
+/**
+ * @brief Do 5x5 depthwise convolution with a quantized matrix, with prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param filter           5x5 filter, size (1, 5, 5, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_5x5_with_prelu(dl_matrix3dq_t *in,
+                                                            dl_matrix3dq_t *filter,
+                                                            dl_matrix3dq_t *prelu,
+                                                            int stride_x,
+                                                            int stride_y,
+                                                            dl_padding_type padding,
+                                                            int exponent,
+                                                            char *name);
+
+/**
+ * @brief Do 5x5 depthwise convolution with a quantized matrix, with bias adding and prelu activation
+ * 
+ * @param in               Input matrix, size (1, w, h, c) 
+ * @param f                5x5 filter, size (1, 5, 5, c)
+ * @param bias             Bias, size (1, 1, 1, c)
+ * @param prelu            prelu params, size (1, 1, 1, c)
+ * @param stride_x         Stride of width
+ * @param stride_y         Stride of height
+ * @param padding          Padding type
+ * @param exponent         Exponent for resulting matrix
+ * @param name             Layer name to debug
+ * @return dl_matrix3dq_t* Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_5x5_with_bias_prelu(dl_matrix3dq_t *in,
+                                                                 dl_matrix3dq_t *f,
+                                                                 dl_matrix3dq_t *bias,
+                                                                 dl_matrix3dq_t *prelu,
+                                                                 int stride_x,
+                                                                 int stride_y,
+                                                                 dl_padding_type padding,
+                                                                 int exponent,
+                                                                 char *name);
                                                    
 //
 // Depthwise Common
 //
 #if CONFIG_DEVELOPING_CODE
+/**
+ * @brief Do a general depthwise convolution layer pass with a quantized matrix
+ * 
+ * @param in                    Input matrix, size (1, w, h, c) 
+ * @param filter                Weights of the neurons, size (1, k_w, k_h, c)
+ * @param stride_x              Stride of width
+ * @param stride_y              Stride of height
+ * @param padding               Padding type
+ * @param exponent              Exponent for resulting matrix
+ * @param mode                  Implementation mode
+ * @return dl_matrix3dq_t*      Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_common(dl_matrix3dq_t *in,
                                                    dl_matrix3dq_t *filter,
                                                    int stride_x,
@ -699,6 +991,18 @@ dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_common(dl_matrix3dq_t *in,
                                                    int exponent,
                                                    dl_conv_mode mode);

+/**
+ * @brief Do a general depthwise convolution layer pass with an 8-bit fixed point matrix
+ * 
+ * @param in                    Input matrix, size (1, w, h, c) 
+ * @param filter                Weights of the neurons, size (1, k_w, k_h, c)
+ * @param stride_x              Stride of width
+ * @param stride_y              Stride of height
+ * @param padding               Padding type
+ * @param exponent              Exponent for resulting matrix
+ * @param mode                  Implementation mode
+ * @return dl_matrix3dq_t*      Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_common(dl_matrix3du_t *in,
                                                    dl_matrix3dq_t *filter,
                                                    int stride_x,
@ -712,6 +1016,14 @@ dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_common(dl_matrix3du_t *in,
 // Dot Product
 //

+/**
+ * @brief Do dot product operation with a quantized matrix
+ * 
+ * @param out     Preallocated resulting matrix, size (1, 1, 1, h)
+ * @param in      Input matrix, size (1, 1, 1, w)
+ * @param filter  Filter matrix, size (1, w, h, 1)
+ * @param mode    Implementation mode
+ */
 void dl_matrix3dqq_dot_product(dl_matrix3dq_t *out,
                               dl_matrix3dq_t *in,
                               dl_matrix3dq_t *filter,
@ -727,7 +1039,7 @@ void dl_matrix3dqq_dot_product(dl_matrix3dq_t *out,
 * @param in        Input matrix, size (1, 1, 1, w)
 * @param filter    Filter matrix, size (1, w, h, 1)
 * @param mode      Implementation mode
- * @param name
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_fc(dl_matrix3dq_t *out,
                      dl_matrix3dq_t *in,
@ -743,6 +1055,7 @@ void dl_matrix3dqq_fc(dl_matrix3dq_t *out,
 * @param filter    Filter matrix, size (1, w, h, 1)
 * @param bias      Bias matrix, size (1, 1, 1, h)
 * @param mode      Implementation mode
+ * @param name      Layer name to debug
 */
 void dl_matrix3dqq_fc_with_bias(dl_matrix3dq_t *out,
                                dl_matrix3dq_t *in,
@ -830,6 +1143,28 @@ dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock(dl_matrix3dq_t *in,
                                              dl_conv_mode mode,
                                              int shortcut);

+/**
+ * @brief Do mobilefacenet process, the process sequence is 1x1 pointwise->bn->prelu->3x3 depthwise->bn->prelu->1x1 pointwise->bn
+ * 
+ * @param in                     Input matrix, size (1, w, h, c)
+ * @param pw                     Pointwise 1x1 filter, size (n1, 1, 1, c)
+ * @param pw_bias                Pointwise bias, size (1, 1, 1, n1)
+ * @param pw_prelu               Pointwise prelu, size (1, 1, 1, n1)
+ * @param dw                     Depthwise 3x3 filter, size (1, 3, 3, n1)
+ * @param dw_bias                Depthwise bias, size (1, 1, 1, n1)
+ * @param dw_prelu               Depthwise prelu, size(1, 1, 1, n1)
+ * @param pw_linear              Pointwise 1x1 filter, size (n2, 1, 1, n1)
+ * @param pw_linear_bias         Pointwise bias, size (1, 1, 1, n2)
+ * @param pw_exponent            Exponent for pointwise resulting matrix
+ * @param dw_exponent            Exponent for depthwise resulting matrix
+ * @param pw_linear_exponent     Exponent for pointwise resulting matrix
+ * @param stride_x               Stride of width
+ * @param stride_y               Stride of height
+ * @param padding                Depthwise Convlution Padding type
+ * @param mode                   Implementation mode
+ * @param shortcut               Whether has a shortcut at pointwise linear
+ * @return dl_matrix3dq_t*       Resulting quantized matrix
+ */
 dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_prelu(dl_matrix3dq_t *in,
                                                dl_matrix3dq_t *pw,
                                                dl_matrix3dq_t *pw_bias,
@ -848,6 +1183,15 @@ dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_prelu(dl_matrix3dq_t *in,
                                                dl_conv_mode mode,
                                                int shortcut);

+/**@{*/
+/**
+ * @brief Do mobilefacenet process, the process sequence is 1x1 pointwise->bn->prelu->3x3 depthwise->bn->prelu->1x1 pointwise->bn
+ * 
+ * Compared to ‘dl_matrix3dqq_mobilefaceblock_prelu’， this family of functions 'dl_matrix3dqq_mobilefaceblock_prelu_split_x1_x2' 
+ * split the first pointwise convlution into x1 pointwise convlutions, and split the second pointwise convlution into x2 pointwise convlutions.
+ * 
+ * 
+ */
 dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_prelu_split_2_2(dl_matrix3dq_t *in,
                                                    dl_matrix3dq_t *pw_1,
                                                    dl_matrix3dq_t *pw_2,
@ -910,6 +1254,59 @@ dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_prelu_split_1_2(dl_matrix3dq_t *in
                                                    dl_padding_type padding,
                                                    dl_conv_mode mode,
                                                    int shortcut);
+/**@}*/
+
+//
+//  blazeblock
+//
+
+/**
+ * @brief Do blazeblock process, the process sequence is depthwise->bn->1x1 pointwise->bn->shortcut->relu
+ * 
+ * @param in                 Input matrix, size (1, w, h, c)
+ * @param dw1_kernel         Depthwise filter, size (1, k, k, c)
+ * @param dw1_bias           Depthwise bias, size (1, 1, 1, c)
+ * @param pw1_kernel         Pointwise 1x1 filter, size (n, 1, 1, c)
+ * @param pw1_bias           Pointwise bias, size (1, 1, 1, n)
+ * @param config             blazeblock configuration
+ * @param name               Layer name to debug
+ * @return dl_matrix3dq_t*   Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_blazeblock(dl_matrix3dq_t *in,
+                                        dl_matrix3dq_t *dw1_kernel,
+                                        dl_matrix3dq_t *dw1_bias,
+                                        dl_matrix3dq_t *pw1_kernel,
+                                        dl_matrix3dq_t *pw1_bias,
+                                        dl_matrix3dq_blazeblock_config_t config,
+                                        char *name);
+
+/**
+ * @brief Do double blazeblock process, the process sequence is depthwise->bn->1x1 pointwise->bn->relu->depthwise->bn->1x1 pointwise->bn->shortcut->relu 
+ * 
+ * @param in                 Input matrix, size (1, w, h, c)
+ * @param dw1_kernel         Depthwise filter, size (1, k, k, c)
+ * @param dw1_bias           Depthwise bias, size (1, 1, 1, c)
+ * @param pw1_kernel         Pointwise 1x1 filter, size (n1, 1, 1, c)
+ * @param pw1_bias           Pointwise bias, size (1, 1, 1, n1)
+ * @param dw2_kernel         Depthwise filter, size (1, k, k, n1)
+ * @param dw2_bias           Depthwise bias, size (1, 1, 1, n1)
+ * @param pw2_kernel         Pointwise 1x1 filter, size (n2, 1, 1, n1)
+ * @param pw2_bias           Pointwise bias, size (1, 1, 1, n2)
+ * @param config             blazeblock configuration
+ * @param name               Layer name to debug
+ * @return dl_matrix3dq_t*   Resulting quantized matrix
+ */
+dl_matrix3dq_t *dl_matrix3dqq_double_blazeblock(dl_matrix3dq_t *in,
+                                        dl_matrix3dq_t *dw1_kernel,
+                                        dl_matrix3dq_t *dw1_bias,
+                                        dl_matrix3dq_t *pw1_kernel,
+                                        dl_matrix3dq_t *pw1_bias,
+                                        dl_matrix3dq_t *dw2_kernel,
+                                        dl_matrix3dq_t *dw2_bias,
+                                        dl_matrix3dq_t *pw2_kernel,
+                                        dl_matrix3dq_t *pw2_bias,
+                                        dl_matrix3dq_blazeblock_config_t config,
+                                        char *name);
 //
 // Mobilenet
 //
@ -925,7 +1322,8 @@ dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_prelu_split_1_2(dl_matrix3dq_t *in
 * @param compress              Pointwise 1x1 filter, size (n2, 1, 1, n1)
 * @param bias                  Pointwise bias, size (1, 1, 1, n2)
 * @param config                Mobilenet configuration
- * @return                      Resulting quantized matrix
+ * @param name                  Block name to debug
+ * @return dl_matrix3dq_t*      Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3dqq_mobilenet(dl_matrix3dq_t *in,
                                        dl_matrix3dq_t *dilate,
@ -948,7 +1346,8 @@ dl_matrix3dq_t *dl_matrix3dqq_mobilenet(dl_matrix3dq_t *in,
 * @param compress              Pointwise 1x1 filter, size (n2, 1, 1, n1)
 * @param bias                  Pointwise bias, size (1, 1, 1, n2)
 * @param config                Mobilenet configuration
- * @return                      Resulting quantized matrix
+ * @param name                  Block name to debug
+ * @return dl_matrix3dq_t*      Resulting quantized matrix
 */
 dl_matrix3dq_t *dl_matrix3duq_mobilenet(dl_matrix3du_t *in,
                                        dl_matrix3dq_t *dilate,
@ -964,18 +1363,19 @@ dl_matrix3dq_t *dl_matrix3duq_mobilenet(dl_matrix3du_t *in,
 // Padding
 //

+/**@{*/
 /**
- * @brief 
+ * @brief This family of functions do a padding operation before a convlution
 * 
- * @param padded_input 
- * @param output_height 
- * @param output_width 
- * @param input 
- * @param stride_x 
- * @param stride_y 
- * @param kernel_size 
- * @param padding_type 
- * @return dl_error_type 
+ * @param padded_input      the padded result pointer
+ * @param output_height     the output height pointer
+ * @param output_width      the output width pointer
+ * @param input             Input matrix, size (1, w, h, c)
+ * @param stride_x          Stride of width
+ * @param stride_y          Stride of height
+ * @param kernel_size       Kernel size of the next convlution
+ * @param padding_type      Padding type
+ * @return dl_error_type    Return DL_SUCCESS if padding successfully, else return DL_FAIL
 */
 dl_error_type dl_matrix3dqq_padding(dl_matrix3dq_t **padded_input,
                                    int *output_height,
@ -986,19 +1386,6 @@ dl_error_type dl_matrix3dqq_padding(dl_matrix3dq_t **padded_input,
                                    int kernel_size,
                                    dl_padding_type padding_type);

-/**
- * @brief 
- * 
- * @param padded_input 
- * @param output_height 
- * @param output_width 
- * @param input 
- * @param stride_x 
- * @param stride_y 
- * @param kernel_size 
- * @param padding_type 
- * @return dl_error_type 
- */
 dl_error_type dl_matrix3duq_padding(dl_matrix3du_t **padded_input,
                                    int *output_height,
                                    int *output_width,
@ -1007,6 +1394,20 @@ dl_error_type dl_matrix3duq_padding(dl_matrix3du_t **padded_input,
                                    int stride_y,
                                    int kernel_size,
                                    dl_padding_type padding_type);
+/**@}*/
+
+//
+// Upsample
+//
+/**
+ * @brief Upsample a feature map to twice the size 
+ * 
+ * @param in                  Input matrix, size (1, w, h, c)
+ * @param upsample            upsample type
+ * @return dl_matrix3dq_t*    Resulting matrix, size (1, 2*w, 2*h, c)
+ */
+dl_matrix3dq_t *dl_matrix3dqq_upsample_2x(dl_matrix3dq_t *in, 
+                                        dl_upsample_type upsample);

 //
 // Pooling
@ -1014,22 +1415,22 @@ dl_error_type dl_matrix3duq_padding(dl_matrix3du_t **padded_input,
 /**
 * @brief Calculate average value of a feature map
 *
- * @param in        Input matrix, size (1, w, h, c)
- * @return          Resulting matrix, size (1, 1, 1, c)
+ * @param in                 Input matrix, size (1, w, h, c)
+ * @return dl_matrix3dq_t*   Resulting matrix, size (1, 1, 1, c)
 */
 dl_matrix3dq_t *dl_matrix3dq_global_pool(dl_matrix3dq_t *in);

 /**
 * @brief Calculate pooling layer of a feature map
 *
- * @param in        Input matrix, size (1, w, h, c)
- * @param f_w       Window width
- * @param f_h       Window height 
- * @param stride_x  Stride in horizontal direction
- * @param stride_y  Stride in vertical direction
- * @param padding   Padding type: PADDING_VALID and PADDING_SAME
- * @param pooling_type   Pooling type: DL_POOLING_MAX and POOLING_AVG
- * @return          Resulting matrix, size (1, w', h', c)
+ * @param in                Input matrix, size (1, w, h, c)
+ * @param f_w               Window width
+ * @param f_h               Window height 
+ * @param stride_x          Stride in horizontal direction
+ * @param stride_y          Stride in vertical direction
+ * @param padding           Padding type: PADDING_VALID and PADDING_SAME
+ * @param pooling_type      Pooling type: DL_POOLING_MAX and POOLING_AVG
+ * @return dl_matrix3dq_t*  Resulting matrix, size (1, w', h', c)
 */
 dl_matrix3dq_t *dl_matrix3dq_pooling(dl_matrix3dq_t *in,
                                     int f_w,
--- a/tools/sdk/include/esp-face/esp_image.hpp
+++ b/tools/sdk/include/esp-face/esp_image.hpp
@ -0,0 +1,344 @@
+/*
+  * ESPRESSIF MIT License
+  *
+  * Copyright (c) 2018 <ESPRESSIF SYSTEMS (SHANGHAI) PTE LTD>
+  *
+  * Permission is hereby granted for use on ESPRESSIF SYSTEMS products only, in which case,
+  * it is free of charge, to any person obtaining a copy of this software and associated
+  * documentation files (the "Software"), to deal in the Software without restriction, including
+  * without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+  * and/or sell copies of the Software, and to permit persons to whom the Software is furnished
+  * to do so, subject to the following conditions:
+  *
+  * The above copyright notice and this permission notice shall be included in all copies or
+  * substantial portions of the Software.
+  *
+  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+  * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+  * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *
+  */
+#pragma once
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+#include <math.h>
+#include <assert.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+typedef enum
+{
+    IMAGE_RESIZE_BILINEAR = 0, /*<! Resize image by taking bilinear of four pixels */
+    IMAGE_RESIZE_MEAN = 1,     /*<! Resize image by taking mean of four pixels */
+    IMAGE_RESIZE_NEAREST = 2   /*<! Resize image by taking the nearest pixel */
+} image_resize_t;
+
+template <class T>
+class Image
+{
+public:
+    /**
+     * @brief Convert a RGB565 pixel to RGB888
+     * 
+     * @param input     Pixel value in RGB565
+     * @param output    Pixel value in RGB888
+     */
+    static inline void pixel_rgb565_to_rgb888(uint16_t input, T *output)
+    {
+        output[2] = (input & 0x1F00) >> 5;                           //blue
+        output[1] = ((input & 0x7) << 5) | ((input & 0xE000) >> 11); //green
+        output[0] = input & 0xF8;                                    //red
+    };
+
+    /**
+     * @brief Resize a RGB565 image to a RGB88 image
+     * 
+     * @param dst_image     The destination image
+     * @param y_start       The start y index of where resized image located
+     * @param y_end         The end y index of where resized image located
+     * @param x_start       The start x index of where resized image located
+     * @param x_end         The end x index of where resized image located
+     * @param channel       The channel number of image
+     * @param src_image     The source image
+     * @param src_h         The height of source image
+     * @param src_w         The width of source image
+     * @param dst_w         The width of destination image
+     * @param shift_left    The bit number of left shifting
+     * @param type          The resize type
+     */
+    static void resize_to_rgb888(T *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint16_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type);
+
+    /**
+     * @brief Resize a RGB888 image to a RGB88 image
+     * 
+     * @param dst_image     The destination image
+     * @param y_start       The start y index of where resized image located
+     * @param y_end         The end y index of where resized image located
+     * @param x_start       The start x index of where resized image located
+     * @param x_end         The end x index of where resized image located
+     * @param channel       The channel number of image
+     * @param src_image     The source image
+     * @param src_h         The height of source image
+     * @param src_w         The width of source image
+     * @param dst_w         The width of destination image
+     * @param shift_left    The bit number of left shifting
+     * @param type          The resize type
+     */
+    static void resize_to_rgb888(T *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint8_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type);
+    // static void resize_to_rgb565(uint16_t *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint16_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type);
+    // static void resize_to_rgb565(uint16_t *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint8_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type);
+};
+
+template <class T>
+void Image<T>::resize_to_rgb888(T *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint16_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type)
+{
+    assert(channel == 3);
+    float scale_y = (float)src_h / (y_end - y_start);
+    float scale_x = (float)src_w / (x_end - x_start);
+    int temp[13];
+
+    switch (type)
+    {
+    case IMAGE_RESIZE_BILINEAR:
+        for (size_t y = y_start; y < y_end; y++)
+        {
+            float ratio_y[2];
+            ratio_y[0] = (float)((y + 0.5) * scale_y - 0.5); // y
+            int src_y = (int)ratio_y[0];                     // y1
+            ratio_y[0] -= src_y;                             // y - y1
+
+            if (src_y < 0)
+            {
+                ratio_y[0] = 0;
+                src_y = 0;
+            }
+            if (src_y > src_h - 2)
+            {
+                ratio_y[0] = 0;
+                src_y = src_h - 2;
+            }
+            ratio_y[1] = 1 - ratio_y[0]; // y2 - y
+
+            int _dst_i = y * dst_w;
+
+            int _src_row_0 = src_y * src_w;
+            int _src_row_1 = _src_row_0 + src_w;
+
+            for (size_t x = x_start; x < x_end; x++)
+            {
+                float ratio_x[2];
+                ratio_x[0] = (float)((x + 0.5) * scale_x - 0.5); // x
+                int src_x = (int)ratio_x[0];                     // x1
+                ratio_x[0] -= src_x;                             // x - x1
+
+                if (src_x < 0)
+                {
+                    ratio_x[0] = 0;
+                    src_x = 0;
+                }
+                if (src_x > src_w - 2)
+                {
+                    ratio_x[0] = 0;
+                    src_x = src_w - 2;
+                }
+                ratio_x[1] = 1 - ratio_x[0]; // x2 - x
+
+                int dst_i = (_dst_i + x) * channel;
+
+                int src_row_0 = _src_row_0 + src_x;
+                int src_row_1 = _src_row_1 + src_x;
+
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_0], temp);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_0 + 1], temp + 3);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_1], temp + 6);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_1 + 1], temp + 9);
+
+                for (int c = 0; c < channel; c++)
+                {
+                    temp[12] = round(temp[c] * ratio_x[1] * ratio_y[1] + temp[channel + c] * ratio_x[0] * ratio_y[1] + temp[channel + channel + c] * ratio_x[1] * ratio_y[0] + src_image[channel + channel + channel + c] * ratio_x[0] * ratio_y[0]);
+                    dst_image[dst_i + c] = (shift_left > 0) ? (temp[12] << shift_left) : (temp[12] >> -shift_left);
+                }
+            }
+        }
+        break;
+
+    case IMAGE_RESIZE_MEAN:
+        shift_left -= 2;
+        for (int y = y_start; y < y_end; y++)
+        {
+            int _dst_i = y * dst_w;
+
+            float _src_row_0 = rintf(y * scale_y) * src_w;
+            float _src_row_1 = _src_row_0 + src_w;
+
+            for (int x = x_start; x < x_end; x++)
+            {
+                int dst_i = (_dst_i + x) * channel;
+
+                int src_row_0 = (_src_row_0 + rintf(x * scale_x));
+                int src_row_1 = (_src_row_1 + rintf(x * scale_x));
+
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_0], temp);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_0 + 1], temp + 3);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_1], temp + 6);
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_row_1 + 1], temp + 9);
+
+                dst_image[dst_i] = (shift_left > 0) ? ((temp[0] + temp[3] + temp[6] + temp[9]) << shift_left) : ((temp[0] + temp[3] + temp[6] + temp[9]) >> -shift_left);
+                dst_image[dst_i + 1] = (shift_left > 0) ? ((temp[1] + temp[4] + temp[7] + temp[10]) << shift_left) : ((temp[1] + temp[4] + temp[7] + temp[10]) >> -shift_left);
+                dst_image[dst_i + 2] = (shift_left > 0) ? ((temp[2] + temp[5] + temp[8] + temp[11]) << shift_left) : ((temp[1] + temp[4] + temp[7] + temp[10]) >> -shift_left);
+            }
+        }
+
+        break;
+
+    case IMAGE_RESIZE_NEAREST:
+        for (size_t y = y_start; y < y_end; y++)
+        {
+            int _dst_i = y * dst_w;
+            float _src_i = rintf(y * scale_y) * src_w;
+
+            for (size_t x = x_start; x < x_end; x++)
+            {
+                int dst_i = (_dst_i + x) * channel;
+                int src_i = _src_i + rintf(x * scale_x);
+
+                Image<int>::pixel_rgb565_to_rgb888(src_image[src_i], temp);
+
+                dst_image[dst_i] = (shift_left > 0) ? (temp[0] << shift_left) : (temp[0] >> -shift_left);
+                dst_image[dst_i + 1] = (shift_left > 0) ? (temp[1] << shift_left) : (temp[1] >> -shift_left);
+                dst_image[dst_i + 2] = (shift_left > 0) ? (temp[2] << shift_left) : (temp[2] >> -shift_left);
+            }
+        }
+        break;
+
+    default:
+        break;
+    }
+}
+
+template <class T>
+void Image<T>::resize_to_rgb888(T *dst_image, int y_start, int y_end, int x_start, int x_end, int channel, uint8_t *src_image, int src_h, int src_w, int dst_w, int shift_left, image_resize_t type)
+{
+    float scale_y = (float)src_h / (y_end - y_start);
+    float scale_x = (float)src_w / (x_end - x_start);
+    int temp;
+
+    switch (type)
+    {
+    case IMAGE_RESIZE_BILINEAR:
+        for (size_t y = y_start; y < y_end; y++)
+        {
+            float ratio_y[2];
+            ratio_y[0] = (float)((y + 0.5) * scale_y - 0.5); // y
+            int src_y = (int)ratio_y[0];                     // y1
+            ratio_y[0] -= src_y;                             // y - y1
+
+            if (src_y < 0)
+            {
+                ratio_y[0] = 0;
+                src_y = 0;
+            }
+            if (src_y > src_h - 2)
+            {
+                ratio_y[0] = 0;
+                src_y = src_h - 2;
+            }
+            ratio_y[1] = 1 - ratio_y[0]; // y2 - y
+
+            int _dst_i = y * dst_w;
+
+            int _src_row_0 = src_y * src_w;
+            int _src_row_1 = _src_row_0 + src_w;
+
+            for (size_t x = x_start; x < x_end; x++)
+            {
+                float ratio_x[2];
+                ratio_x[0] = (float)((x + 0.5) * scale_x - 0.5); // x
+                int src_x = (int)ratio_x[0];                     // x1
+                ratio_x[0] -= src_x;                             // x - x1
+
+                if (src_x < 0)
+                {
+                    ratio_x[0] = 0;
+                    src_x = 0;
+                }
+                if (src_x > src_w - 2)
+                {
+                    ratio_x[0] = 0;
+                    src_x = src_w - 2;
+                }
+                ratio_x[1] = 1 - ratio_x[0]; // x2 - x
+
+                int dst_i = (_dst_i + x) * channel;
+
+                int src_row_0 = (_src_row_0 + src_x) * channel;
+                int src_row_1 = (_src_row_1 + src_x) * channel;
+
+                for (int c = 0; c < channel; c++)
+                {
+                    temp = round(src_image[src_row_0 + c] * ratio_x[1] * ratio_y[1] + src_image[src_row_0 + channel + c] * ratio_x[0] * ratio_y[1] + src_image[src_row_1 + c] * ratio_x[1] * ratio_y[0] + src_image[src_row_1 + channel + c] * ratio_x[0] * ratio_y[0]);
+                    dst_image[dst_i + c] = (shift_left > 0) ? (temp << shift_left) : (temp >> -shift_left);
+                }
+            }
+        }
+        break;
+
+    case IMAGE_RESIZE_MEAN:
+        shift_left -= 2;
+
+        for (size_t y = y_start; y < y_end; y++)
+        {
+            int _dst_i = y * dst_w;
+
+            float _src_row_0 = rintf(y * scale_y) * src_w;
+            float _src_row_1 = _src_row_0 + src_w;
+
+            for (size_t x = x_start; x < x_end; x++)
+            {
+                int dst_i = (_dst_i + x) * channel;
+
+                int src_row_0 = (_src_row_0 + rintf(x * scale_x)) * channel;
+                int src_row_1 = (_src_row_1 + rintf(x * scale_x)) * channel;
+
+                for (size_t c = 0; c < channel; c++)
+                {
+                    temp = (int)src_image[src_row_0 + c] + (int)src_image[src_row_0 + channel + c] + (int)src_image[src_row_1 + c] + (int)src_image[src_row_1 + channel + c];
+                    dst_image[dst_i + c] = (shift_left > 0) ? (temp << shift_left) : (temp >> -shift_left);
+                }
+            }
+        }
+        break;
+
+    case IMAGE_RESIZE_NEAREST:
+        for (size_t y = y_start; y < y_end; y++)
+        {
+            int _dst_i = y * dst_w;
+            float _src_i = rintf(y * scale_y) * src_w;
+
+            for (size_t x = x_start; x < x_end; x++)
+            {
+                int dst_i = (_dst_i + x) * channel;
+                int src_i = (_src_i + rintf(x * scale_x)) * channel;
+
+                for (size_t c = 0; c < channel; c++)
+                {
+                    dst_image[dst_i + c] = (shift_left > 0) ? ((T)src_image[src_i + c] << shift_left) : ((T)src_image[src_i + c] >> -shift_left);
+                }
+            }
+        }
+        break;
+
+    default:
+        break;
+    }
+}
--- a/tools/sdk/include/esp-face/fd_forward.h
+++ b/tools/sdk/include/esp-face/fd_forward.h
@ -34,35 +34,40 @@ extern "C"

    typedef enum
    {
-        FAST = 0,
-        NORMAL = 1,
+        FAST = 0,            /*!< fast resize type */         
+        NORMAL = 1,          /*!< normal resize type */ 
    } mtmn_resize_type;

    typedef struct
    {
-        float score;          /// score threshold for filter candidates by score
-        float nms;            /// nms threshold for nms process
-        int candidate_number; /// candidate number limitation for each net
+        float score;          /*!< score threshold for filter candidates by score */
+        float nms;            /*!< nms threshold for nms process */
+        int candidate_number; /*!< candidate number limitation for each net */
    } threshold_config_t;

    typedef struct
    {
-        int w;                        /// net width
-        int h;                        /// net height
-        threshold_config_t threshold; /// threshold of net
+        int w;                        /*!< net width */
+        int h;                        /*!< net height */
+        threshold_config_t threshold; /*!< threshold of net */
    } net_config_t;

    typedef struct
    {
-        float min_face;                 /// The minimum size of a detectable face
-        float pyramid;                  /// The scale of the gradient scaling for the input images
-        int pyramid_times;              /// The pyramid resizing times
-        threshold_config_t p_threshold; /// The thresholds for P-Net. For details, see the definition of threshold_config_t
-        threshold_config_t r_threshold; /// The thresholds for R-Net. For details, see the definition of threshold_config_t
-        threshold_config_t o_threshold; /// The thresholds for O-Net. For details, see the definition of threshold_config_t
-        mtmn_resize_type type;          /// The image resize type. 'pyramid' will lose efficacy, when 'type'==FAST.
+        float min_face;                 /*!< The minimum size of a detectable face */
+        float pyramid;                  /*!< The scale of the gradient scaling for the input images */
+        int pyramid_times;              /*!< The pyramid resizing times */
+        threshold_config_t p_threshold; /*!< The thresholds for P-Net. For details, see the definition of threshold_config_t */
+        threshold_config_t r_threshold; /*!< The thresholds for R-Net. For details, see the definition of threshold_config_t */
+        threshold_config_t o_threshold; /*!< The thresholds for O-Net. For details, see the definition of threshold_config_t */
+        mtmn_resize_type type;          /*!< The image resize type. 'pyramid' will lose efficacy, when 'type'==FAST. */
    } mtmn_config_t;

+    /**
+     * @brief Get the initial MTMN model configuration
+     * 
+     * @return mtmn_config_t      MTMN configuration
+     */
    static inline mtmn_config_t mtmn_init_config()
    {
        mtmn_config_t mtmn_config;
--- a/tools/sdk/include/esp-face/fr_flash.h
+++ b/tools/sdk/include/esp-face/fr_flash.h
@ -11,8 +11,8 @@ extern "C"
 #define FR_FLASH_SUBTYPE   32
 #define FR_FLASH_PARTITION_NAME "fr"
 #define FR_FLASH_INFO_FLAG 12138
-
-	 /**
+        
+     /**
     * @brief Produce face id according to the input aligned face, and save it to dest_id and flash.
     * 
     * @param l                     Face id list
@ -24,6 +24,16 @@ extern "C"
    int8_t enroll_face_id_to_flash(face_id_list *l,
            dl_matrix3du_t *aligned_face);

+    /**
+     * @brief Produce face id according to the input aligned face, and save the id-name pairs to dest_id and flash.
+     * 
+     * @param l                     Face id list
+     * @param new_id                An aligned face
+     * @param name                  name corresponding to face id
+     * @return -2                   Flash partition not found
+     * @return 0                    Enrollment finish
+     * @return >=1                  The left piece of aligned faces should be input
+     */
    int8_t enroll_face_id_to_flash_with_name(face_id_name_list *l,
            dl_matrix3d_t *new_id,
            char *name);
@ -34,7 +44,13 @@ extern "C"
     * @return int8_t               The number of IDs remaining in flash
     */
    int8_t read_face_id_from_flash(face_id_list *l);
-
+    
+    /**
+     * @brief Read the enrolled face IDs and their corresponding names from the flash.
+     * 
+     * @param l                     Face id list
+     * @return int8_t               The number of IDs remaining in flash
+     */
    int8_t read_face_id_from_flash_with_name(face_id_name_list *l);

    /**
@ -44,7 +60,21 @@ extern "C"
     * @return int8_t               The number of IDs remaining in flash
     */
    int8_t delete_face_id_in_flash(face_id_list *l);
-	int8_t delete_face_id_in_flash_with_name(face_id_name_list *l, char *name);
+
+    /**
+     * @brief Delete the enrolled face ID corresponding to the name in the flash.
+     * 
+     * @param l                     Face id list
+     * @param name                  The name that needs to be deleted
+     * @return int8_t               The number of IDs remaining in flash
+     */
+    int8_t delete_face_id_in_flash_with_name(face_id_name_list *l, char *name);
+
+    /**
+     * @brief Delete all the enrolled face IDs and names paris in the flash.
+     * 
+     * @param l                     Face id list
+     */
    void delete_face_all_in_flash_with_name(face_id_name_list *l);

 #if __cplusplus
--- a/tools/sdk/include/esp-face/fr_forward.h
+++ b/tools/sdk/include/esp-face/fr_forward.h
@ -29,15 +29,13 @@ extern "C"
 #define NOSE_EYE_RATIO_THRES_MIN 0.49f
 #define NOSE_EYE_RATIO_THRES_MAX 2.04f

-/**
- * @brief      HTTP Client events data
- */
+
 #define ENROLL_NAME_LEN 16
    typedef struct tag_face_id_node
    {
-        struct tag_face_id_node *next;
-        char id_name[ENROLL_NAME_LEN];
-        dl_matrix3d_t *id_vec;
+        struct tag_face_id_node *next;           /*!< next face id node */
+        char id_name[ENROLL_NAME_LEN];           /*!< name corresponding to the face id  */
+        dl_matrix3d_t *id_vec;                   /*!< face id */
    } face_id_node;

    typedef struct
@ -59,14 +57,21 @@ extern "C"
    } face_id_list;

    /**
-     * @brief Initialize face id list
+     * @brief Initialize face id list.
     * 
-     * @param l                 Face id list
-     * @param size              Size of list, one list contains one vector
-     * @param confirm_times     Enroll times for one id
-     * @return dl_matrix3du_t*          Size: 1xFACE_WIDTHxFACE_HEIGHTx3
+     * @param l                    Face id list
+     * @param size                 Size of list, one list contains one vector
+     * @param confirm_times        Enroll times for one id
     */
    void face_id_init(face_id_list *l, uint8_t size, uint8_t confirm_times);
+
+    /**
+     * @brief Initialize face id list with name.
+     * 
+     * @param l                    Face id list
+     * @param size                 Size of list, one list contains one vector
+     * @param confirm_times        Enroll times for one id
+     */
    void face_id_name_init(face_id_name_list *l, uint8_t size, uint8_t confirm_times);

    /**
@ -76,8 +81,9 @@ extern "C"
     */
    dl_matrix3du_t *aligned_face_alloc();

+    /**@{*/
    /**
-     * @brief Align detected face to average face according to landmark
+     * @brief Align detected face to average face according to landmark.
     * 
     * @param onet_boxes        Output of MTMN with box and landmark
     * @param src               Image matrix, rgb888 format
@ -88,10 +94,6 @@ extern "C"
    int8_t align_face_rot(box_array_t *onet_boxes,
                      dl_matrix3du_t *src,
                      dl_matrix3du_t *dest);
-
-    int8_t align_face2(fptp_t *landmark,
-                       dl_matrix3du_t *src,
-                       dl_matrix3du_t *dest);
    
    int8_t align_face_sim(box_array_t *onet_boxes,
                   dl_matrix3du_t *src,
@ -103,6 +105,7 @@ extern "C"
    {
        return align_face_sim(onet_boxes, src, dest);              
    }
+    /**@}*/

    /**
     * @brief Run the face recognition model to get the face feature
@ -115,26 +118,34 @@ extern "C"
    /**
     * @brief Add src_id to dest_id
     * 
-     * @param dest_id 
-     * @param src_id 
+     * @param dest_id       Face id after accumulation
+     * @param src_id        Face id to be added
     */
    void add_face_id(dl_matrix3d_t *dest_id,
                     dl_matrix3d_t *src_id);

    /**
     * @brief Match face with the id_list, and return matched_id.
-     * 
+     *
+     * @param l                     An ID list 
     * @param algined_face          An aligned face
-     * @param id_list               An ID list
     * @return int8_t               Matched face id
     */
    int8_t recognize_face(face_id_list *l, dl_matrix3du_t *algined_face);

+    /**
+     * @brief Match face id with the id_list, and return matched face id node.
+     * 
+     * @param l 
+     * @param face_id 
+     * @return face_id_node* 
+     */
    face_id_node *recognize_face_with_name(face_id_name_list *l, dl_matrix3d_t *face_id);
+    
    /**
     * @brief Produce face id according to the input aligned face, and save it to dest_id.
     * 
-     * @param l                     face id list
+     * @param l                     Face id list
     * @param aligned_face          An aligned face
     * @param enroll_confirm_times  Confirm times for each face id enrollment
     * @return -1                   Wrong input enroll_confirm_times
@ -143,18 +154,40 @@ extern "C"
     */
    int8_t enroll_face(face_id_list *l, dl_matrix3du_t *aligned_face);

+    /**
+     * @brief Produce face id according to the input aligned face, and save the id-name pairs to dest_id
+     * 
+     * @param l                      Face id list with name 
+     * @param new_id                 A face id that need to be enrolled
+     * @param name                   name corresponding to the face id  
+     * @return int8_t                The left piece of aligned faces should be input
+     */
    int8_t enroll_face_with_name(face_id_name_list *l,
                                 dl_matrix3d_t *new_id,
                                 char *name);

    /**
-     * @brief Alloc memory for aligned face.
+     * @brief Delete the enrolled face IDs
     * 
-     * @param l                     face id list
-     * @return uint8_t              left count
+     * @param l            Face id list
+     * @return uint8_t     The number of IDs remaining in face id list
     */
    uint8_t delete_face(face_id_list *l);
+
+    /**
+     * @brief Delete the enrolled face IDs and associated names
+     * 
+     * @param l             Face id list
+     * @param name          The name that needs to be deleted
+     * @return int8_t       The number of IDs remaining in face id list
+     */
    int8_t delete_face_with_name(face_id_name_list *l, char *name);
+    
+    /**
+     * @brief               Delete all the enrolled face IDs and names paris
+     * 
+     * @param l             Face id list with names
+     */
    void delete_face_all_with_name(face_id_name_list *l);
 #if __cplusplus
 }
--- a/tools/sdk/include/esp-face/frmn.h
+++ b/tools/sdk/include/esp-face/frmn.h
@ -15,9 +15,10 @@ extern "C"
     * @return dl_matrix3d_t* Face ID feature vector, size is 512
     */
    dl_matrix3d_t *frmn(dl_matrix3d_t *in);
-
+    
+    /**@{*/
    /**
-     * @brief Forward the face recognition process with frmn model. Calculate in quantization.
+     * @brief Forward the face recognition process with specified model. Calculate in quantization.
     *
     * @param in    Image matrix, rgb888 format, size is 56x56, normalized
     * @param mode  0: C implement; 1: handwrite xtensa instruction implement
@ -25,16 +26,8 @@ extern "C"
     */
    dl_matrix3dq_t *frmn_q(dl_matrix3dq_t *in, dl_conv_mode mode);

-    /**
-     * @brief Forward the face recognition process with frmn2p model. Calculate in quantization.
-     *
-     * @param in    Image matrix, rgb888 format, size is 56x56, normalized
-     * @param mode  0: C implement; 1: handwrite xtensa instruction implement
-     * @return      Face ID feature vector, size is 512
-     */
    dl_matrix3dq_t *frmn2p_q(dl_matrix3dq_t *in, dl_conv_mode mode);

-
    dl_matrix3dq_t *mfn56_42m_q(dl_matrix3dq_t *in, dl_conv_mode mode);

    dl_matrix3dq_t *mfn56_72m_q(dl_matrix3dq_t *in, dl_conv_mode mode);
@ -43,6 +36,8 @@ extern "C"

    dl_matrix3dq_t *mfn56_156m_q(dl_matrix3dq_t *in, dl_conv_mode mode);

+    /**@}*/
+
 #if __cplusplus
 }
 #endif
--- a/tools/sdk/include/esp-face/hd_model.h
+++ b/tools/sdk/include/esp-face/hd_model.h
@ -0,0 +1,66 @@
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    typedef struct
+    {
+        int num;              /*!< The total number of the boxes */
+        dl_matrix3d_t *cls;   /*!< The class feature map corresponding to the box. size: (height, width, anchor_num, 1) */
+        dl_matrix3d_t *score; /*!< The confidence score feature map of the class corresponding to the box. size: (height, width, anchor_num, 1) */
+        dl_matrix3d_t *boxes; /*!< (x, y, w, h) of the boxes. x and y are the center coordinates. size:(height, width, anchor_num, 4) */
+    } detection_result_t;
+
+    /**
+     * @brief Forward the hand detection process with hd_nano1 model. Calculate in quantization.
+     * 
+     * @param in                      A normalized image matrix in rgb888 format, its width and height must be integer multiples of 16.
+     * @param mode                    0: C implement; 1: handwrite xtensa instruction implement
+     * @return detection_result_t**   Detection results
+     */
+    detection_result_t **hd_nano1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the hand detection process with hd_lite1 model. Calculate in quantization.
+     * 
+     * @param in                      A normalized image matrix in rgb888 format, its width and height must be integer multiples of 32.
+     * @param mode                    0: C implement; 1: handwrite xtensa instruction implement.
+     * @return detection_result_t**   Detection results.
+     */
+    detection_result_t **hd_lite1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Free the single detection result.
+     * 
+     * @param m     The single detection result.
+     */
+    void detection_result_free(detection_result_t *m);
+
+    /**
+     * @brief Free the detection result group from different feature map.
+     * 
+     * @param m       The detection result group
+     * @param length  The number of the detection results
+     */
+    void detection_results_free(detection_result_t **m, int length);
+
+    /**
+     * @brief Test the result of hand detection model.
+     * 
+     */
+    void hd_test();
+
+    /**
+     * @brief Test the forward time of hand detection model.
+     * 
+     */
+    void hd_time_test();
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/hp_model.h
+++ b/tools/sdk/include/esp-face/hp_model.h
@ -0,0 +1,43 @@
+#pragma once
+
+#if __cplusplus
+extern "C"
+{
+#endif
+
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"
+
+    /**
+     * @brief Forward the hand pose estimation process with hp_nano1_ls16 model. Calculate in quantization.
+     * 
+     * @param in                 A normalized image matrix in rgb888 format, its size is (1, 128, 128, 3).
+     * @param mode               0: C implement; 1: handwrite xtensa instruction implement
+     * @return dl_matrix3d_t*    The resulting hand joint point coordinates, the size is (1, 1, 21, 2)
+     */
+    dl_matrix3d_t *hp_nano1_ls16_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the hand pose estimation process with hp_lite1 model. Calculate in quantization.
+     * 
+     * @param in                 A normalized image matrix in rgb888 format, its size is (1, 128, 128, 3).
+     * @param mode               0: C implement; 1: handwrite xtensa instruction implement
+     * @return dl_matrix3d_t*    The resulting hand joint point coordinates, the size is (1, 1, 21, 2)
+     */
+    dl_matrix3d_t *hp_lite1_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Test the result of hand pose estimation model.
+     * 
+     */
+    void hp_test();
+
+    /**
+     * @brief Test the forward time of hand pose estimation model.
+     * 
+     */
+    void hp_time_test();
+
+#if __cplusplus
+}
+#endif
--- a/tools/sdk/include/esp-face/image_util.h
+++ b/tools/sdk/include/esp-face/image_util.h
@ -30,6 +30,8 @@ extern "C"
 #include <math.h>
 #include "mtmn.h"

+#define LANDMARKS_NUM (10)
+
 #define MAX_VALID_COUNT_PER_IMAGE (30)

 #define DL_IMAGE_MIN(A, B) ((A) < (B) ? (A) : (B))
@ -41,48 +43,64 @@ extern "C"

    typedef enum
    {
-        BINARY,
+        BINARY, /*!< binary */
    } en_threshold_mode;
+
    typedef struct
    {
-        fptp_t landmark_p[10];
+        fptp_t landmark_p[LANDMARKS_NUM]; /*!< landmark struct */
    } landmark_t;

    typedef struct
    {
-        fptp_t box_p[4];
+        fptp_t box_p[4]; /*!< box struct */
    } box_t;

    typedef struct tag_box_list
    {
-        fptp_t *score;
-        box_t *box;
-        landmark_t *landmark;
-        int len;
+        uint8_t *category;    /*!< The category of the corresponding box */
+        fptp_t *score;        /*!< The confidence score of the class corresponding to the box */
+        box_t *box;           /*!< Anchor boxes or predicted boxes*/
+        landmark_t *landmark; /*!< The landmarks corresponding to the box */
+        int len;              /*!< The num of the boxes */
    } box_array_t;

    typedef struct tag_image_box
    {
-        struct tag_image_box *next;
-        fptp_t score;
-        box_t box;
-        box_t offset;
-        landmark_t landmark;
+        struct tag_image_box *next; /*!< Next image_box_t */
+        uint8_t category;
+        fptp_t score;        /*!< The confidence score of the class corresponding to the box */
+        box_t box;           /*!< Anchor boxes or predicted boxes */
+        box_t offset;        /*!< The predicted anchor-based offset */
+        landmark_t landmark; /*!< The landmarks corresponding to the box */
    } image_box_t;

    typedef struct tag_image_list
    {
-        image_box_t *head;
-        image_box_t *origin_head;
-        int len;
+        image_box_t *head;        /*!< The current head of the image_list */
+        image_box_t *origin_head; /*!< The original head of the image_list */
+        int len;                  /*!< Length of the image_list */
    } image_list_t;

+    /**
+     * @brief Get the width and height of the box.
+     * 
+     * @param box         Input box
+     * @param w           Resulting width of the box
+     * @param h           Resulting height of the box
+     */
    static inline void image_get_width_and_height(box_t *box, float *w, float *h)
    {
        *w = box->box_p[2] - box->box_p[0] + 1;
        *h = box->box_p[3] - box->box_p[1] + 1;
    }

+    /**
+     * @brief Get the area of the box.
+     * 
+     * @param box         Input box
+     * @param area        Resulting area of the box 
+     */
    static inline void image_get_area(box_t *box, float *area)
    {
        float w, h;
@ -90,6 +108,13 @@ extern "C"
        *area = w * h;
    }

+    /**
+     * @brief calibrate the boxes by offset
+     * 
+     * @param image_list         Input boxes
+     * @param image_height       Height of the original image
+     * @param image_width        Width of the original image
+     */
    static inline void image_calibrate_by_offset(image_list_t *image_list, int image_height, int image_width)
    {
        for (image_box_t *head = image_list->head; head; head = head->next)
@ -113,6 +138,11 @@ extern "C"
        }
    }

+    /**
+     * @brief calibrate the landmarks
+     * 
+     * @param image_list     Input landmarks
+     */
    static inline void image_landmark_calibrate(image_list_t *image_list)
    {
        for (image_box_t *head = image_list->head; head; head = head->next)
@ -136,6 +166,13 @@ extern "C"
        }
    }

+    /**
+     * @brief Convert a rectangular box into a square box
+     * 
+     * @param boxes    Input box 
+     * @param width    Width of the orignal image
+     * @param height   height of the orignal image
+     */
    static inline void image_rect2sqr(box_array_t *boxes, int width, int height)
    {
        for (int i = 0; i < boxes->len; i++)
@ -169,33 +206,67 @@ extern "C"
        }
    }

+    /**@{*/
+    /**
+     * @brief Convert RGB565 image to RGB888 image
+     * 
+     * @param in    Input RGB565 image
+     * @param dst   Resulting RGB888 image
+     */
    static inline void rgb565_to_888(uint16_t in, uint8_t *dst)
-    {                                           /*{{{*/
-        dst[0] = (in & RGB565_MASK_BLUE) << 3;  // blue
+    { /*{{{*/
+        in = (in & 0xFF) << 8 | (in & 0xFF00) >> 8;
+        dst[2] = (in & RGB565_MASK_BLUE) << 3;  // blue
        dst[1] = (in & RGB565_MASK_GREEN) >> 3; // green
-        dst[2] = (in & RGB565_MASK_RED) >> 8;   // red
-    }                                           /*}}}*/
+        dst[0] = (in & RGB565_MASK_RED) >> 8;   // red

+        // dst[0] = (in & 0x1F00) >> 5;
+        // dst[1] = ((in & 0x7) << 5) | ((in & 0xE000) >> 11);
+        // dst[2] = in & 0xF8;
+    } /*}}}*/
+
+    static inline void rgb565_to_888_q16(uint16_t in, int16_t *dst)
+    { /*{{{*/
+        in = (in & 0xFF) << 8 | (in & 0xFF00) >> 8;
+        dst[2] = (in & RGB565_MASK_BLUE) << 3;  // blue
+        dst[1] = (in & RGB565_MASK_GREEN) >> 3; // green
+        dst[0] = (in & RGB565_MASK_RED) >> 8;   // red
+
+        // dst[0] = (in & 0x1F00) >> 5;
+        // dst[1] = ((in & 0x7) << 5) | ((in & 0xE000) >> 11);
+        // dst[2] = in & 0xF8;
+    } /*}}}*/
+    /**@}*/
+
+    /**
+     * @brief Convert RGB888 image to RGB565 image
+     * 
+     * @param in      Resulting RGB565 image
+     * @param r       The red channel of the Input RGB888 image 
+     * @param g       The green channel of the Input RGB888 image 
+     * @param b       The blue channel of the Input RGB888 image
+     */
    static inline void rgb888_to_565(uint16_t *in, uint8_t r, uint8_t g, uint8_t b)
    { /*{{{*/
        uint16_t rgb565 = 0;
        rgb565 = ((r >> 3) << 11);
        rgb565 |= ((g >> 2) << 5);
        rgb565 |= (b >> 3);
+        rgb565 = (rgb565 & 0xFF) << 8 | (rgb565 & 0xFF00) >> 8;
        *in = rgb565;
    } /*}}}*/

    /**
-     * @brief 
+     * @brief Filter out the resulting boxes whose confidence score is lower than the threshold and convert the boxes to the actual boxes on the original image.((x, y, w, h) -> (x1, y1, x2, y2))
     * 
-     * @param score 
-     * @param offset 
-     * @param landmark 
-     * @param width 
-     * @param height 
-     * @param anchor_number 
-     * @param anchors_size 
-     * @param score_threshold 
+     * @param score                    Confidence score of the boxes
+     * @param offset                   The predicted anchor-based offset
+     * @param landmark                 The landmarks corresponding to the box
+     * @param width                    Height of the original image
+     * @param height                   Width of the original image
+     * @param anchor_number            Anchor number of the detection output feature map 
+     * @param anchors_size             The anchor size
+     * @param score_threshold          Threshold of the confidence score
     * @param stride 
     * @param resized_height_scale 
     * @param resized_width_scale 
@ -215,32 +286,32 @@ extern "C"
                                        fptp_t resized_width_scale,
                                        bool do_regression);
    /**
-     * @brief 
+     * @brief Sort the resulting box lists by their confidence score.
     * 
-     * @param image_sorted_list 
-     * @param insert_list 
+     * @param image_sorted_list     The sorted box list.
+     * @param insert_list           The box list that have not been sorted.
     */
    void image_sort_insert_by_score(image_list_t *image_sorted_list, const image_list_t *insert_list);

    /**
-     * @brief 
+     * @brief Run NMS algorithm 
     * 
-     * @param image_list 
-     * @param nms_threshold 
-     * @param same_area 
+     * @param image_list         The input boxes list
+     * @param nms_threshold      NMS threshold
+     * @param same_area          The flag of boxes with same area
     */
    void image_nms_process(image_list_t *image_list, fptp_t nms_threshold, int same_area);

    /**
-     * @brief 
+     * @brief Resize an image to half size 
     * 
-     * @param dimage 
-     * @param dw 
-     * @param dh 
-     * @param dc 
-     * @param simage 
-     * @param sw 
-     * @param sc 
+     * @param dimage      The output image
+     * @param dw          Width of the output image
+     * @param dh          Height of the output image
+     * @param dc          Channel of the output image
+     * @param simage      Source image
+     * @param sw          Width of the source image
+     * @param sc          Channel of the source image
     */
    void image_zoom_in_twice(uint8_t *dimage,
                             int dw,
@ -251,82 +322,227 @@ extern "C"
                             int sc);

    /**
-     * @brief 
+     * @brief Resize the image in RGB888 format via bilinear interpolation
     * 
-     * @param dst_image 
-     * @param src_image 
-     * @param dst_w 
-     * @param dst_h 
-     * @param dst_c 
-     * @param src_w 
-     * @param src_h 
+     * @param dst_image    The output image
+     * @param src_image    Source image
+     * @param dst_w        Width of the output image
+     * @param dst_h        Height of the output image
+     * @param dst_c        Channel of the output image
+     * @param src_w        Width of the source image
+     * @param src_h        Height of the source image
     */
    void image_resize_linear(uint8_t *dst_image, uint8_t *src_image, int dst_w, int dst_h, int dst_c, int src_w, int src_h);

    /**
-     * @brief 
+     * @brief Crop， rotate and zoom the image in RGB888 format, 
     * 
-     * @param corp_image 
-     * @param src_image 
-     * @param rotate_angle 
-     * @param ratio 
-     * @param center 
+     * @param corp_image       The output image
+     * @param src_image        Source image
+     * @param rotate_angle     Rotate angle
+     * @param ratio            scaling ratio
+     * @param center           Center of rotation
     */
    void image_cropper(uint8_t *corp_image, uint8_t *src_image, int dst_w, int dst_h, int dst_c, int src_w, int src_h, float rotate_angle, float ratio, float *center);

    /**
-     * @brief 
+     * @brief Convert the rgb565 image to the rgb888 image   
     * 
-     * @param m 
-     * @param bmp 
-     * @param count 
+     * @param m       The output rgb888 image
+     * @param bmp     The input rgb565 image
+     * @param count   Total pixels of the rgb565 image
     */
-    void transform_input_image(uint8_t *m, uint16_t *bmp, int count);
+    void image_rgb565_to_888(uint8_t *m, uint16_t *bmp, int count);

    /**
-     * @brief 
+     * @brief Convert the rgb888 image to the rgb565 image
     * 
-     * @param bmp 
-     * @param m 
-     * @param count 
+     * @param bmp     The output rgb565 image
+     * @param m       The input rgb888 image
+     * @param count   Total pixels of the rgb565 image
     */
-    void transform_output_image(uint16_t *bmp, uint8_t *m, int count);
-    void transform_output_image_adjustable(uint16_t *bmp, uint8_t *m, int src_w, int src_h, int dst_w, int dst_h);
+    void image_rgb888_to_565(uint16_t *bmp, uint8_t *m, int count);

    /**
-     * @brief 
+     * @brief draw rectangle on the rgb565 image
     * 
-     * @param buf 
-     * @param boxes 
-     * @param width 
+     * @param buf     Input image
+     * @param boxes   Rectangle Boxes
+     * @param width   Width of the input image
     */
    void draw_rectangle_rgb565(uint16_t *buf, box_array_t *boxes, int width);

    /**
-     * @brief 
+     * @brief draw rectangle on the rgb888 image
     * 
-     * @param buf 
-     * @param boxes 
-     * @param width 
+     * @param buf     Input image
+     * @param boxes   Rectangle Boxes
+     * @param width   Width of the input image
     */
    void draw_rectangle_rgb888(uint8_t *buf, box_array_t *boxes, int width);
+
+    /**
+     * @brief Get the pixel difference of two images
+     * 
+     * @param dst       The output pixel difference
+     * @param src1      Input image 1
+     * @param src2      Input image 2
+     * @param count     Total pixels of the input image
+     */
    void image_abs_diff(uint8_t *dst, uint8_t *src1, uint8_t *src2, int count);
+
+    /**
+     * @brief Binarize an image to 0 and value. 
+     * 
+     * @param dst           The output image
+     * @param src           Source image
+     * @param threshold     Threshold of binarization
+     * @param value         The value of binarization
+     * @param count         Total pixels of the input image
+     * @param mode          Threshold mode
+     */
    void image_threshold(uint8_t *dst, uint8_t *src, int threshold, int value, int count, en_threshold_mode mode);
+
+    /**
+     * @brief Erode the image
+     * 
+     * @param dst          The output image
+     * @param src          Source image
+     * @param src_w        Width of the source image
+     * @param src_h        Height of the source image
+     * @param src_c        Channel of the source image
+     */
    void image_erode(uint8_t *dst, uint8_t *src, int src_w, int src_h, int src_c);

    typedef float matrixType;
    typedef struct
    {
-        int w;
-        int h;
-        matrixType **array;
+        int w;              /*!< width */
+        int h;              /*!< height */
+        matrixType **array; /*!< array */
    } Matrix;

+    /**
+     * @brief Allocate a 2d matrix
+     * 
+     * @param h                Height of matrix
+     * @param w                Width of matrix
+     * @return Matrix*         2d matrix
+     */
    Matrix *matrix_alloc(int h, int w);
+
+    /**
+     * @brief Free a 2d matrix
+     * 
+     * @param m    2d matrix 
+     */
    void matrix_free(Matrix *m);
+
+    /**
+     * @brief Get the similarity matrix of similarity transformation
+     * 
+     * @param srcx          Source x coordinates
+     * @param srcy          Source y coordinates
+     * @param dstx          Destination x coordinates
+     * @param dsty          Destination y coordinates
+     * @param num           The number of the coordinates
+     * @return Matrix*      The resulting transformation matrix
+     */
    Matrix *get_similarity_matrix(float *srcx, float *srcy, float *dstx, float *dsty, int num);
+
+    /**
+     * @brief Get the affine transformation matrix
+     * 
+     * @param srcx          Source x coordinates
+     * @param srcy          Source y coordinates
+     * @param dstx          Destination x coordinates
+     * @param dsty          Destination y coordinates
+     * @return Matrix*      The resulting transformation matrix
+     */
+    Matrix *get_affine_transform(float *srcx, float *srcy, float *dstx, float *dsty);
+
+    /**
+     * @brief Applies an affine transformation to an image
+     * 
+     * @param img           Input image
+     * @param crop          Dst output image that has the size dsize and the same type as src
+     * @param M             Affine transformation matrix
+     */
    void warp_affine(dl_matrix3du_t *img, dl_matrix3du_t *crop, Matrix *M);

+    /**
+     * @brief Resize the image in RGB888 format via bilinear interpolation, and quantify the output image
+     * 
+     * @param dst_image            Quantized output image
+     * @param src_image            Input image 
+     * @param dst_w                Width of the output image 
+     * @param dst_h                Height of the output image 
+     * @param dst_c                Channel of the output image
+     * @param src_w                Width of the input image 
+     * @param src_h                Height of the input image
+     * @param shift                Shift parameter of quantization.
+     */
+    void image_resize_linear_q(qtp_t *dst_image, uint8_t *src_image, int dst_w, int dst_h, int dst_c, int src_w, int src_h, int shift);
+
+    /**
+     * @brief Preprocess the input image of object detection model. The process is like this: resize -> normalize -> quantify
+     * 
+     * @param image                 Input image, RGB888 format.
+     * @param input_w               Width of the input image.
+     * @param input_h               Height of the input image.
+     * @param target_size           Target size of the model input image.
+     * @param exponent              Exponent of the quantized model input image.
+     * @param process_mode          Process mode. 0: resize with padding to keep height == width. 1: resize without padding, height != width.  
+     * @return dl_matrix3dq_t*      The resulting preprocessed image.
+     */
+    dl_matrix3dq_t *image_resize_normalize_quantize(uint8_t *image, int input_w, int input_h, int target_size, int exponent, int process_mode);
+
+    /**
+     * @brief Resize the image in RGB565 format via mean neighbour interpolation, and quantify the output image
+     * 
+     * @param dimage            Quantized output image. 
+     * @param simage            Input image.  
+     * @param dw                Width of the allocated output image memory.
+     * @param dc                Channel of the allocated output image memory.
+     * @param sw                Width of the input image. 
+     * @param sh                Height of the input image. 
+     * @param tw                Target width of the output image.
+     * @param th                Target height of the output image.
+     * @param shift             Shift parameter of quantization.
+     */
+    void image_resize_shift_fast(qtp_t *dimage, uint16_t *simage, int dw, int dc, int sw, int sh, int tw, int th, int shift);
+
+    /**
+     * @brief Resize the image in RGB565 format via nearest neighbour interpolation, and quantify the output image
+     * 
+     * @param dimage            Quantized output image. 
+     * @param simage            Input image.  
+     * @param dw                Width of the allocated output image memory.
+     * @param dc                Channel of the allocated output image memory.
+     * @param sw                Width of the input image. 
+     * @param sh                Height of the input image. 
+     * @param tw                Target width of the output image.
+     * @param th                Target height of the output image.
+     * @param shift             Shift parameter of quantization.
+     */
+    void image_resize_nearest_shift(qtp_t *dimage, uint16_t *simage, int dw, int dc, int sw, int sh, int tw, int th, int shift);
+
+    /**
+     * @brief Crop the image in RGB565 format and resize it to target size, then quantify the output image 
+     * 
+     * @param dimage            Quantized output image. 
+     * @param simage            Input image.
+     * @param dw                Target size of the output image.
+     * @param sw                Width of the input image. 
+     * @param sh                Height of the input image. 
+     * @param x1                The x coordinate of the upper left corner of the cropped area
+     * @param y1                The y coordinate of the upper left corner of the cropped area
+     * @param x2                The x coordinate of the lower right corner of the cropped area
+     * @param y2                The y coordinate of the lower right corner of the cropped area
+     * @param shift             Shift parameter of quantization.
+     */
+    void image_crop_shift_fast(qtp_t *dimage, uint16_t *simage, int dw, int sw, int sh, int x1, int y1, int x2, int y2, int shift);
+
 #ifdef __cplusplus
 }
 #endif