Update IDF to a8916daeb (#2992)

2025-10-18 08:45:27 +02:00 · 2019-07-17 10:09:43 +03:00
parent 3376ea1bd5
commit 9e32cec9a2
100 changed files with 797 additions and 239 deletions
--- a/tools/sdk/include/esp-face/dl_lib_matrix3d.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3d.h
@@ -1,20 +1,19 @@
 #pragma once

 #include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
 typedef float fptp_t;
 typedef uint8_t uc_t;

 typedef enum
 {
-    DL_C_IMPL = 0,
-    DL_XTENSA_IMPL = 1
-} dl_conv_mode;
-
-typedef enum
-{
-    INPUT_UINT8 = 0,
-    INPUT_FLOAT = 1,
-} dl_op_type;
+    DL_SUCCESS = 0,
+    DL_FAIL = 1,
+} dl_error_type;

 typedef enum
 {
@@ -53,9 +52,7 @@ typedef struct
    int stride_x;
    int stride_y;
    dl_padding_type padding;
-    dl_conv_mode mode;
-    dl_op_type type;
-} dl_matrix3d_conv_config_t;
+} dl_matrix3d_mobilenet_config_t;

 /*
 * @brief Allocate a 3D matrix with float items, the access sequence is NHWC
@@ -93,7 +90,6 @@ void dl_matrix3d_free(dl_matrix3d_t *m);
 */
 void dl_matrix3du_free(dl_matrix3du_t *m);

-
 /*
 * @brief Dot product with a vector and matrix
 *
@@ -101,24 +97,7 @@ void dl_matrix3du_free(dl_matrix3du_t *m);
 * @param in    input vector
 * @param f     filter matrix
 */
-void dl_matrix3d_dot_product(dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);
-
-/**
- * @brief Do a relu (Rectifier Linear Unit) operation, update the input matrix3d
- *
- * @param in        Floating point input matrix3d
- * @param clip      If value is higher than this, it will be clipped to this value
- */
-void dl_matrix3d_relu(dl_matrix3d_t *m, fptp_t clip);
-
-/**
- * @brief Do a leaky relu (Rectifier Linear Unit) operation, update the input matrix3d
- *
- * @param in        Floating point input matrix3d
- * @param clip      If value is higher than this, it will be clipped to this value
- * @param alpha     If value is less than zero, it will be updated by multiplying this factor
- */
-void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t clip, fptp_t alpha);
+void dl_matrix3dff_dot_product(dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);

 /**
 * @brief Do a softmax operation on a matrix3d
@@ -127,18 +106,6 @@ void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t clip, fptp_t alpha);
 */
 void dl_matrix3d_softmax(dl_matrix3d_t *m);

-/**
- * @brief Do a general fully connected layer pass, dimension is (number, width, height, channel)
- *
- * @param in             Input matrix3d, size is (1, w, 1, 1)
- * @param filter         Weights of the neurons, size is (1, w, h, 1)
- * @param bias           Bias for the fc layer, size is (1, 1, 1, h)
- * @return               The result of fc layer, size is (1, 1, 1, h)
- */
-dl_matrix3d_t *dl_matrix3d_fc(dl_matrix3d_t *in,
-                              dl_matrix3d_t *filter,
-                              dl_matrix3d_t *bias);
-
 /**
 * @brief Copy a range of float items from an existing matrix to a preallocated matrix
 *
@@ -173,9 +140,6 @@ void dl_matrix3du_slice_copy(dl_matrix3du_t *dst,
                             int w,
                             int h);

-
-void dl_matrix3d_conv_1x1 (dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);
-
 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
 *
@@ -197,11 +161,6 @@ dl_matrix3d_t *dl_matrix3d_conv(dl_matrix3d_t *in,
                                int padding,
                                int mode);

-void dl_matrix3d_conv_3x3_normal (dl_matrix3d_t *out,
-                                    dl_matrix3d_t *in,
-                                    dl_matrix3d_t *f,
-                                    int step_x,
-                                    int step_y);
 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
 *
@@ -215,57 +174,6 @@ void dl_matrix3d_conv_3x3_normal (dl_matrix3d_t *out,
 *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
 * @return               The result of CNN layer
 */
-dl_matrix3d_t *dl_matrix3du_conv(dl_matrix3du_t *in,
-                                 dl_matrix3d_t *filter,
-                                 dl_matrix3d_t *bias,
-                                 int stride_x,
-                                 int stride_y,
-                                 int padding,
-                                 int mode);
-
-/**
- * @brief Do a depthwise CNN layer pass, dimension is (number, width, height, channel)
- *
- * @param in             Input matrix3d
- * @param filter         Weights of the neurons
- * @param stride_x       The step length of the convolution window in x(width) direction
- * @param stride_y       The step length of the convolution window in y(height) direction
- * @param padding        One of VALID or SAME
- * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
- *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
- * @return               The result of depthwise CNN layer
- */
-dl_matrix3d_t *dl_matrix3d_depthwise_conv(dl_matrix3d_t *in,
-                                          dl_matrix3d_t *filter,
-                                          int stride_x,
-                                          int stride_y,
-                                          int padding,
-                                          int mode);
-
-void dl_matrix3d_depthwise_conv_3x3_normal(dl_matrix3d_t *out,
-                                            dl_matrix3d_t *in,
-                                            dl_matrix3d_t *f,
-                                            int step_x,
-                                            int step_y);
-/**
- * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
- *
- * @param in             Input matrix3d
- * @param filter         Weights of the neurons
- * @param stride_x       The step length of the convolution window in x(width) direction
- * @param stride_y       The step length of the convolution window in y(height) direction
- * @param padding        One of VALID or SAME
- * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
- *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
- * @return               The result of depthwise CNN layer
- */
-dl_matrix3d_t *dl_matrix3d_mobilenet(void *in,
-                                     dl_matrix3d_t *dilate,
-                                     dl_matrix3d_t *depthwise,
-                                     dl_matrix3d_t *compress,
-                                     dl_matrix3d_t *bias,
-                                     dl_matrix3d_t *prelu,
-                                     dl_matrix3d_conv_config_t *config);

 /**
 * @brief Do a global average pooling layer pass, dimension is (number, width, height, channel)
@@ -297,13 +205,6 @@ void dl_matrix3d_batch_normalize(dl_matrix3d_t *m,
 */
 dl_matrix3d_t *dl_matrix3d_add(dl_matrix3d_t *in_1, dl_matrix3d_t *in_2);

-/**
- * @brief Do a standard relu operation, update the input matrix3d
- *
- * @param m        Floating point input matrix3d
- */
-void dl_matrix3d_relu_std(dl_matrix3d_t *m);
-
 /**
 * @brief Concatenate the channels of two matrix3ds into a new matrix3d
 *
@@ -372,7 +273,7 @@ dl_matrix3d_t *dl_matrix3d_concat_8(dl_matrix3d_t *in_1,
 *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
 * @return                      The result of a mobilefacenet block
 */
-dl_matrix3d_t *dl_matrix3d_mobilefaceblock(void *in,
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock(dl_matrix3d_t *in,
                                           dl_matrix3d_t *pw,
                                           dl_matrix3d_t *pw_bn_scale,
                                           dl_matrix3d_t *pw_bn_offset,
@@ -410,7 +311,7 @@ dl_matrix3d_t *dl_matrix3d_mobilefaceblock(void *in,
 *                              If ESP_PLATFORM is not defined, this value is not used. Default is 0
 * @return                      The result of a mobilefacenet block
 */
-dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(void *in,
+dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(dl_matrix3d_t *in,
                                                 dl_matrix3d_t *pw_1,
                                                 dl_matrix3d_t *pw_2,
                                                 dl_matrix3d_t *pw_bn_scale,
@@ -427,23 +328,200 @@ dl_matrix3d_t *dl_matrix3d_mobilefaceblock_split(void *in,
                                                 int padding,
                                                 int mode,
                                                 int shortcut);
-/**
- * @brief Print the matrix3d items
- *
- * @param m              dl_matrix3d_t to be printed
- * @param message        name of matrix
- */
-void dl_matrix3d_print(dl_matrix3d_t *m, char *message);

-/**
- * @brief Print the matrix3du items
- *
- * @param m              dl_matrix3du_t to be printed
- * @param message        name of matrix
- */
-void dl_matrix3du_print(dl_matrix3du_t *m, char *message);
-
-
-void dl_matrix3d_init_bias (dl_matrix3d_t *out, dl_matrix3d_t *bias);
+void dl_matrix3d_init_bias(dl_matrix3d_t *out, dl_matrix3d_t *bias);

 void dl_matrix3d_multiply(dl_matrix3d_t *out, dl_matrix3d_t *in1, dl_matrix3d_t *in2);
+
+//
+// Activation
+//
+
+/**
+ * @brief Do a standard relu operation, update the input matrix3d
+ *
+ * @param m        Floating point input matrix3d
+ */
+void dl_matrix3d_relu(dl_matrix3d_t *m);
+
+/**
+ * @brief Do a relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param clip      If value is higher than this, it will be clipped to this value
+ */
+void dl_matrix3d_relu_clip(dl_matrix3d_t *m, fptp_t clip);
+
+/**
+ * @brief Do a Prelu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param alpha     If value is less than zero, it will be updated by multiplying this factor
+ */
+void dl_matrix3d_p_relu(dl_matrix3d_t *in, dl_matrix3d_t *alpha);
+
+/**
+ * @brief Do a leaky relu (Rectifier Linear Unit) operation, update the input matrix3d
+ *
+ * @param in        Floating point input matrix3d
+ * @param alpha     If value is less than zero, it will be updated by multiplying this factor
+ */
+void dl_matrix3d_leaky_relu(dl_matrix3d_t *m, fptp_t alpha);
+
+//
+// Conv 1x1
+//
+void dl_matrix3dff_conv_1x1(dl_matrix3d_t *out,
+                            dl_matrix3d_t *in,
+                            dl_matrix3d_t *filter);
+
+void dl_matrix3dff_conv_1x1_with_bias(dl_matrix3d_t *out,
+                                      dl_matrix3d_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias);
+
+void dl_matrix3duf_conv_1x1(dl_matrix3d_t *out,
+                            dl_matrix3du_t *in,
+                            dl_matrix3d_t *filter);
+
+void dl_matrix3duf_conv_1x1_with_bias(dl_matrix3d_t *out,
+                                      dl_matrix3du_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias);
+
+//
+// Conv 3x3
+//
+void dl_matrix3dff_conv_3x3_op(dl_matrix3d_t *out,
+                               dl_matrix3d_t *in,
+                               dl_matrix3d_t *f,
+                               int step_x,
+                               int step_y);
+
+dl_matrix3d_t *dl_matrix3dff_conv_3x3(dl_matrix3d_t *in,
+                                      dl_matrix3d_t *filter,
+                                      dl_matrix3d_t *bias,
+                                      int stride_x,
+                                      int stride_y,
+                                      dl_padding_type padding);
+
+//
+// Conv Common
+//
+
+dl_matrix3d_t *dl_matrix3duf_conv_common(dl_matrix3du_t *in,
+                                         dl_matrix3d_t *filter,
+                                         dl_matrix3d_t *bias,
+                                         int stride_x,
+                                         int stride_y,
+                                         dl_padding_type padding);
+
+//
+// Depthwise 3x3
+//
+
+dl_matrix3d_t *dl_matrix3dff_depthwise_conv_3x3(dl_matrix3d_t *in,
+                                                dl_matrix3d_t *filter,
+                                                int stride_x,
+                                                int stride_y,
+                                                int padding);
+
+dl_matrix3d_t *dl_matrix3duf_depthwise_conv_3x3(dl_matrix3du_t *in,
+                                                dl_matrix3d_t *filter,
+                                                int stride_x,
+                                                int stride_y,
+                                                int padding);
+
+void dl_matrix3dff_depthwise_conv_3x3_op(dl_matrix3d_t *out,
+                                         dl_matrix3d_t *in,
+                                         dl_matrix3d_t *f,
+                                         int step_x,
+                                         int step_y);
+
+//
+// Depthwise Common
+//
+
+/**
+ * @brief Do a depthwise CNN layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3dff_depthwise_conv_common(dl_matrix3d_t *in,
+                                                   dl_matrix3d_t *filter,
+                                                   int stride_x,
+                                                   int stride_y,
+                                                   dl_padding_type padding);
+
+//
+// FC
+//
+/**
+ * @brief Do a general fully connected layer pass, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d, size is (1, w, 1, 1)
+ * @param filter         Weights of the neurons, size is (1, w, h, 1)
+ * @param bias           Bias for the fc layer, size is (1, 1, 1, h)
+ * @return               The result of fc layer, size is (1, 1, 1, h)
+ */
+void dl_matrix3dff_fc(dl_matrix3d_t *out,
+                      dl_matrix3d_t *in,
+                      dl_matrix3d_t *filter);
+
+void dl_matrix3dff_fc_with_bias(dl_matrix3d_t *out,
+                                dl_matrix3d_t *in,
+                                dl_matrix3d_t *filter,
+                                dl_matrix3d_t *bias);
+
+//
+// Mobilenet
+//
+
+/**
+ * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3d
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3dff_mobilenet(dl_matrix3d_t *in,
+                                       dl_matrix3d_t *dilate_filter,
+                                       dl_matrix3d_t *dilate_prelu,
+                                       dl_matrix3d_t *depthwise_filter,
+                                       dl_matrix3d_t *depthwise_prelu,
+                                       dl_matrix3d_t *compress_filter,
+                                       dl_matrix3d_t *bias,
+                                       dl_matrix3d_mobilenet_config_t config);
+
+/**
+ * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
+ *
+ * @param in             Input matrix3du
+ * @param filter         Weights of the neurons
+ * @param stride_x       The step length of the convolution window in x(width) direction
+ * @param stride_y       The step length of the convolution window in y(height) direction
+ * @param padding        One of VALID or SAME
+ * @param mode           Do convolution using C implement or xtensa implement, 0 or 1, with respect
+ *                       If ESP_PLATFORM is not defined, this value is not used. Default is 0
+ * @return               The result of depthwise CNN layer
+ */
+dl_matrix3d_t *dl_matrix3duf_mobilenet(dl_matrix3du_t *in,
+                                       dl_matrix3d_t *dilate_filter,
+                                       dl_matrix3d_t *dilate_prelu,
+                                       dl_matrix3d_t *depthwise_filter,
+                                       dl_matrix3d_t *depthwise_prelu,
+                                       dl_matrix3d_t *compress_filter,
+                                       dl_matrix3d_t *bias,
+                                       dl_matrix3d_mobilenet_config_t config);
--- a/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
@@ -10,22 +10,48 @@ typedef int16_t qtp_t;
 typedef struct
 {
    /******* fix start *******/
-    int w;  // Width
-    int h;  // Height
-    int c;  // Channel
-    int n;  // Number, to record filter's out_channels. input and output must be 1
+    int w; // Width
+    int h; // Height
+    int c; // Channel
+    int n; // Number, to record filter's out_channels. input and output must be 1
    int stride;
    int exponent;
    qtp_t *item;
    /******* fix end *******/
 } dl_matrix3dq_t;

+#ifndef DL_QTP_SHIFT
 #define DL_QTP_SHIFT 15
-#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
-//#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
+#define DL_ITMQ(m, x, y) m->itemq[(y) + (x)*m->stride]
+#define DL_QTP_RANGE ((1 << DL_QTP_SHIFT) - 1)
+#define DL_QTP_MAX 32767
+#define DL_QTP_MIN -32768
+
 #define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null

 #define DL_SHIFT_AUTO 32
+#endif
+
+typedef enum
+{
+    DL_C_IMPL = 0,
+    DL_XTENSA_IMPL = 1
+} dl_conv_mode;
+
+typedef struct
+{
+    int stride_x;
+    int stride_y;
+    dl_padding_type padding;
+    dl_conv_mode mode;
+    int dilate_exponent;
+    int depthwise_exponent;
+    int compress_exponent;
+} dl_matrix3dq_mobilenet_config_t;
+
+//
+// Utility
+//

 /*
 * @brief Allocate a 3D matrix
@@ -49,9 +75,6 @@ void dl_matrix3dq_free(dl_matrix3dq_t *m);
 * @param m     Matrix to zero
 */

- dl_matrix3d_t *dl_matrix3d_from_matrixq(dl_matrix3dq_t *m);
- dl_matrix3dq_t *dl_matrixq_from_matrix3d_qmf(dl_matrix3d_t *m,int exponent);
- dl_matrix3dq_t *dl_matrixq_from_matrix3d(dl_matrix3d_t *m);
 /**
 * @brief Copy a range of items from an existing matrix to a preallocated matrix
 *
@@ -62,8 +85,143 @@ void dl_matrix3dq_free(dl_matrix3dq_t *m);
 * @param h     Height of the resulting matrix
 * @return The resulting slice matrix
 */
-void dl_matrix3dq_slice_copy (dl_matrix3dq_t *dst, dl_matrix3dq_t *src, int x, int y, int w, int h);
+void dl_matrix3dq_slice_copy(dl_matrix3dq_t *dst, dl_matrix3dq_t *src, int x, int y, int w, int h);

+dl_matrix3d_t *dl_matrix3d_from_matrixq(dl_matrix3dq_t *m);
+
+dl_matrix3dq_t *dl_matrixq_from_matrix3d_qmf(dl_matrix3d_t *m, int exponent);
+
+dl_matrix3dq_t *dl_matrixq_from_matrix3d(dl_matrix3d_t *m);
+
+qtp_t dl_matrix3dq_quant_range_exceeded_checking(int64_t value, char *location);
+
+void dl_matrix3dq_shift_exponent(dl_matrix3dq_t *out, dl_matrix3dq_t *in, int exponent);
+
+void dl_matrix3dq_batch_normalize(dl_matrix3dq_t *m, dl_matrix3dq_t *scale, dl_matrix3dq_t *offset);
+
+dl_matrix3dq_t *dl_matrix3dq_add(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, int exponent);
+
+//
+// Activation
+//
+void dl_matrix3dq_relu(dl_matrix3dq_t *in);
+
+void dl_matrix3dq_relu_clip(dl_matrix3dq_t *in, fptp_t clip);
+
+void dl_matrix3dq_leaky_relu(dl_matrix3dq_t *in, fptp_t alpha, fptp_t clip);
+
+void dl_matrix3dq_p_relu(dl_matrix3dq_t *in, dl_matrix3dq_t *alpha);
+
+//
+// Concat
+//
+dl_matrix3dq_t *dl_matrix3dq_concat(dl_matrix3dq_t *in_1,
+                                    dl_matrix3dq_t *in_2);
+
+dl_matrix3dq_t *dl_matrix3dq_concat_4(dl_matrix3dq_t *in_1,
+                                      dl_matrix3dq_t *in_2,
+                                      dl_matrix3dq_t *in_3,
+                                      dl_matrix3dq_t *in_4);
+
+dl_matrix3dq_t *dl_matrix3dq_concat_8(dl_matrix3dq_t *in_1,
+                                      dl_matrix3dq_t *in_2,
+                                      dl_matrix3dq_t *in_3,
+                                      dl_matrix3dq_t *in_4,
+                                      dl_matrix3dq_t *in_5,
+                                      dl_matrix3dq_t *in_6,
+                                      dl_matrix3dq_t *in_7,
+                                      dl_matrix3dq_t *in_8);
+
+//
+// Conv 1x1
+//
+void dl_matrix3dqq_conv_1x1(dl_matrix3dq_t *out,
+                            dl_matrix3dq_t *in,
+                            dl_matrix3dq_t *filter,
+                            dl_conv_mode mode);
+
+void dl_matrix3dqq_conv_1x1_with_relu(dl_matrix3dq_t *out,
+                                      dl_matrix3dq_t *in,
+                                      dl_matrix3dq_t *filter,
+                                      dl_conv_mode mode);
+
+void dl_matrix3dqq_conv_1x1_with_bias(dl_matrix3dq_t *out,
+                                      dl_matrix3dq_t *in,
+                                      dl_matrix3dq_t *filter,
+                                      dl_matrix3dq_t *bias,
+                                      dl_conv_mode mode,
+                                      char *name);
+
+void dl_matrix3dqq_conv_1x1_with_prelu(dl_matrix3dq_t *out,
+                                       dl_matrix3dq_t *in,
+                                       dl_matrix3dq_t *filter,
+                                       dl_matrix3dq_t *prelu,
+                                       dl_conv_mode mode);
+
+void dl_matrix3dqq_conv_1x1_with_bias_relu(dl_matrix3dq_t *out,
+                                           dl_matrix3dq_t *in,
+                                           dl_matrix3dq_t *filter,
+                                           dl_matrix3dq_t *bias,
+                                           dl_conv_mode mode);
+
+void dl_matrix3duq_conv_1x1(dl_matrix3dq_t *out,
+                            dl_matrix3du_t *in,
+                            dl_matrix3dq_t *filter,
+                            dl_conv_mode mode);
+
+void dl_matrix3duq_conv_1x1_with_bias(dl_matrix3dq_t *out,
+                                      dl_matrix3du_t *in,
+                                      dl_matrix3dq_t *filter,
+                                      dl_matrix3dq_t *bias,
+                                      dl_conv_mode mode);
+
+//
+// Conv 3x3
+//
+void dl_matrix3dqq_conv_3x3_op(dl_matrix3dq_t *out,
+                               dl_matrix3dq_t *in,
+                               dl_matrix3dq_t *f,
+                               int stride_x,
+                               int stride_y);
+
+dl_matrix3dq_t *dl_matrix3dqq_conv_3x3(dl_matrix3dq_t *in,
+                                       dl_matrix3dq_t *filter,
+                                       int stride_x,
+                                       int stride_y,
+                                       dl_padding_type padding,
+                                       int exponent);
+
+dl_matrix3dq_t *dl_matrix3dqq_conv_3x3_with_bias(dl_matrix3dq_t *in,
+                                                 dl_matrix3dq_t *f,
+                                                 dl_matrix3dq_t *bias,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent,
+                                                 int relu);
+
+dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias(dl_matrix3du_t *in,
+                                                 dl_matrix3dq_t *filter,
+                                                 dl_matrix3dq_t *bias,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent,
+                                                 char *name);
+
+dl_matrix3dq_t *dl_matrix3duq_conv_3x3_with_bias_prelu(dl_matrix3du_t *in,
+                                                       dl_matrix3dq_t *filter,
+                                                       dl_matrix3dq_t *bias,
+                                                       dl_matrix3dq_t *prelu,
+                                                       int stride_x,
+                                                       int stride_y,
+                                                       dl_padding_type padding,
+                                                       int exponent,
+                                                       char *name);
+
+//
+// Conv common
+//

 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
@@ -78,67 +236,213 @@ void dl_matrix3dq_slice_copy (dl_matrix3dq_t *dst, dl_matrix3dq_t *src, int x, i
 *                       If ESP_PLATFORM is not defined, this value is not used.
 * @return               The result of CNN layer.
 */
-dl_matrix3dq_t *dl_matrix3dq_fc (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias, int exponent,int mode);
+dl_matrix3dq_t *dl_matrix3dqq_conv_common(dl_matrix3dq_t *in,
+                                          dl_matrix3dq_t *filter,
+                                          dl_matrix3dq_t *bias,
+                                          int stride_x,
+                                          int stride_y,
+                                          dl_padding_type padding,
+                                          int exponent,
+                                          dl_conv_mode mode);

-dl_matrix3dq_t *dl_matrix3dq_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias,
-                                    int stride_x, int stride_y, int padding, int exponent, int mode);
-dl_matrix3dq_t *dl_matrix3dq_conv_normal (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias,
-                                    int stride_x, int stride_y, int padding, int exponent, int mode);
+dl_matrix3dq_t *dl_matrix3duq_conv_common(dl_matrix3du_t *in,
+                                          dl_matrix3dq_t *filter,
+                                          dl_matrix3dq_t *bias,
+                                          int stride_x,
+                                          int stride_y,
+                                          dl_padding_type padding,
+                                          int exponent,
+                                          dl_conv_mode mode);

-void dl_matrix3dq_conv_1x1 (dl_matrix3dq_t *out, dl_matrix3dq_t *in, dl_matrix3dq_t *f, dl_conv_mode mode);
+//
+// Depthwise 3x3
+//
+dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_3x3(dl_matrix3du_t *in,
+                                                 dl_matrix3dq_t *filter,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent);

-void dl_matrix3dq_conv_3x3_normal (dl_matrix3dq_t *out,
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3(dl_matrix3dq_t *in,
+                                                 dl_matrix3dq_t *filter,
+                                                 int stride_x,
+                                                 int stride_y,
+                                                 dl_padding_type padding,
+                                                 int exponent);
+
+#if CONFIG_DEVELOPING_CODE
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_2(dl_matrix3dq_t *in,
+                                                   dl_matrix3dq_t *filter,
+                                                   int stride_x,
+                                                   int stride_y,
+                                                   dl_padding_type padding,
+                                                   int exponent);
+
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_3(dl_matrix3dq_t *in,
+                                                   dl_matrix3dq_t *filter,
+                                                   int stride_x,
+                                                   int stride_y,
+                                                   dl_padding_type padding,
+                                                   int exponent);
+#endif
+
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_bias(dl_matrix3dq_t *in,
+                                                           dl_matrix3dq_t *f,
+                                                           dl_matrix3dq_t *bias,
+                                                           int stride_x,
+                                                           int stride_y,
+                                                           dl_padding_type padding,
+                                                           int exponent,
+                                                           int relu);
+
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3_with_prelu(dl_matrix3dq_t *in,
+                                                            dl_matrix3dq_t *filter,
+                                                            dl_matrix3dq_t *prelu,
+                                                            int stride_x,
+                                                            int stride_y,
+                                                            dl_padding_type padding,
+                                                            int exponent);
+
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_3x3s1_with_bias(dl_matrix3dq_t *in,
+                                                             dl_matrix3dq_t *f,
+                                                             dl_matrix3dq_t *bias,
+                                                             dl_padding_type padding,
+                                                             int exponent,
+                                                             int relu);
+
+//
+// Depthwise Common
+//
+#if CONFIG_DEVELOPING_CODE
+dl_matrix3dq_t *dl_matrix3dqq_depthwise_conv_common(dl_matrix3dq_t *in,
+                                                    dl_matrix3dq_t *filter,
+                                                    int stride_x,
+                                                    int stride_y,
+                                                    dl_padding_type padding,
+                                                    int exponent,
+                                                    dl_conv_mode mode);
+
+dl_matrix3dq_t *dl_matrix3duq_depthwise_conv_common(dl_matrix3du_t *in,
+                                                    dl_matrix3dq_t *filter,
+                                                    int stride_x,
+                                                    int stride_y,
+                                                    dl_padding_type padding,
+                                                    int exponent,
+                                                    dl_conv_mode mode);
+#endif
+
+//
+// Dot Product
+//
+
+void dl_matrix3dqq_dot_product(dl_matrix3dq_t *out,
+                               dl_matrix3dq_t *in,
+                               dl_matrix3dq_t *filter,
+                               dl_conv_mode mode);
+
+//
+// FC
+//
+
+void dl_matrix3dqq_fc(dl_matrix3dq_t *out,
+                      dl_matrix3dq_t *in,
+                      dl_matrix3dq_t *filter,
+                      dl_conv_mode mode);
+
+void dl_matrix3dqq_fc_with_bias(dl_matrix3dq_t *out,
+                                dl_matrix3dq_t *in,
+                                dl_matrix3dq_t *filter,
+                                dl_matrix3dq_t *bias,
+                                dl_conv_mode mode,
+                                char *name);
+
+//
+// Mobilefaceblock
+//
+
+dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock_split(dl_matrix3dq_t *in,
+                                                    dl_matrix3dq_t *pw_1,
+                                                    dl_matrix3dq_t *pw_2,
+                                                    dl_matrix3dq_t *pw_bias,
+                                                    dl_matrix3dq_t *dw,
+                                                    dl_matrix3dq_t *dw_bias,
+                                                    dl_matrix3dq_t *pw_linear_1,
+                                                    dl_matrix3dq_t *pw_linear_2,
+                                                    dl_matrix3dq_t *pw_linear_bias,
+                                                    int pw_exponent,
+                                                    int dw_exponent,
+                                                    int pw_linear_exponent,
+                                                    int stride_x,
+                                                    int stride_y,
+                                                    dl_padding_type padding,
+                                                    dl_conv_mode mode,
+                                                    int shortcut);
+
+dl_matrix3dq_t *dl_matrix3dqq_mobilefaceblock(dl_matrix3dq_t *in,
+                                              dl_matrix3dq_t *pw,
+                                              dl_matrix3dq_t *pw_bias,
+                                              dl_matrix3dq_t *dw,
+                                              dl_matrix3dq_t *dw_bias,
+                                              dl_matrix3dq_t *pw_linear,
+                                              dl_matrix3dq_t *pw_linear_bias,
+                                              int pw_exponent,
+                                              int dw_exponent,
+                                              int pw_linear_exponent,
+                                              int stride_x,
+                                              int stride_y,
+                                              dl_padding_type padding,
+                                              dl_conv_mode mode,
+                                              int shortcut);
+
+//
+// Mobilenet
+//
+
+dl_matrix3dq_t *dl_matrix3dqq_mobilenet(dl_matrix3dq_t *in,
+                                        dl_matrix3dq_t *dilate,
+                                        dl_matrix3dq_t *dilate_prelu,
+                                        dl_matrix3dq_t *depthwise,
+                                        dl_matrix3dq_t *depth_prelu,
+                                        dl_matrix3dq_t *compress,
+                                        dl_matrix3dq_t *bias,
+                                        dl_matrix3dq_mobilenet_config_t config,
+                                        char *name);
+
+dl_matrix3dq_t *dl_matrix3duq_mobilenet(dl_matrix3du_t *in,
+                                        dl_matrix3dq_t *dilate,
+                                        dl_matrix3dq_t *dilate_prelu,
+                                        dl_matrix3dq_t *depthwise,
+                                        dl_matrix3dq_t *depth_prelu,
+                                        dl_matrix3dq_t *compress,
+                                        dl_matrix3dq_t *bias,
+                                        dl_matrix3dq_mobilenet_config_t config,
+                                        char *name);
+
+//
+// Padding
+//
+
+dl_error_type dl_matrix3dqq_padding(dl_matrix3dq_t **padded_in,
+                                    dl_matrix3dq_t **out,
                                    dl_matrix3dq_t *in,
-                                    dl_matrix3dq_t *f,
-                                    int step_x,
-                                    int step_y);
-dl_matrix3dq_t *dl_matrix3dq_conv_3x3_with_bn (dl_matrix3dq_t *in,
-                                    dl_matrix3dq_t *f,
-                                    dl_matrix3dq_t *scale,
-                                    dl_matrix3dq_t *offset,
-                                    int step_x,
-                                    int step_y,
+                                    int out_c,
+                                    int stride_x,
+                                    int stride_y,
                                    int padding,
-                                    int exponent,
-                                    int relu);
-/**
- * @brief Print the matrix3d items
- *
- * @param m              dl_matrix3d_t to be printed
- * @param message        name of matrix
- */
-void dl_matrix3dq_print (dl_matrix3dq_t *m, char *message);
+                                    int exponent);

-dl_matrix3dq_t *dl_matrix3dq_depthwise_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter,
-                                    int stride_x, int stride_y, int padding, int exponent, int mode);
-dl_matrix3dq_t *dl_matrix3dq_depthwise_conv_3x3_with_bn(dl_matrix3dq_t *in,
-                                            dl_matrix3dq_t *f,
-                                            dl_matrix3dq_t *scale,
-                                            dl_matrix3dq_t *offset,
-                                            int step_x,
-                                            int step_y,
-                                            int padding,
-                                            int exponent,
-                                            int relu);
+dl_error_type dl_matrix3duq_padding(dl_matrix3du_t **padded_in,
+                                    dl_matrix3dq_t **out,
+                                    dl_matrix3du_t *in,
+                                    int out_c,
+                                    int stride_x,
+                                    int stride_y,
+                                    int padding,
+                                    int exponent);

-void dl_matrix3dq_relu (dl_matrix3dq_t *m, fptp_t clip);
+//
+// Pooling
+//

-
-
-dl_matrix3dq_t *dl_matrix3dq_global_pool (dl_matrix3dq_t *in);
-void dl_matrix3dq_batch_normalize (dl_matrix3dq_t *m, dl_matrix3dq_t *scale, dl_matrix3dq_t *offset);
-dl_matrix3dq_t *dl_matrix3dq_add (dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, int exponent);
-void dl_matrix3dq_relu_std (dl_matrix3dq_t *m);
-dl_matrix3dq_t *dl_matrix3dq_mobilefaceblock (void *in, dl_matrix3dq_t *pw, dl_matrix3dq_t *pw_bn_scale,dl_matrix3dq_t *pw_bn_offset,
-                                        dl_matrix3dq_t *dw, dl_matrix3dq_t *dw_bn_scale,dl_matrix3dq_t *dw_bn_offset,
-                                        dl_matrix3dq_t *pw_linear, dl_matrix3dq_t *pw_linear_bn_scale,dl_matrix3dq_t *pw_linear_bn_offset,
-                                        int pw_exponent,int dw_exponent,int pw_linear_exponent,int stride_x, int stride_y, int padding, int mode, int shortcut);
-
-dl_matrix3dq_t *dl_matrix3dq_concat(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2);
-dl_matrix3dq_t *dl_matrix3dq_concat_4(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, dl_matrix3dq_t *in_3, dl_matrix3dq_t *in_4);
-dl_matrix3dq_t *dl_matrix3dq_concat_8(dl_matrix3dq_t *in_1, dl_matrix3dq_t *in_2, dl_matrix3dq_t *in_3, dl_matrix3dq_t *in_4, dl_matrix3dq_t *in_5, dl_matrix3dq_t *in_6, dl_matrix3dq_t *in_7, dl_matrix3dq_t *in_8);
-
-dl_matrix3dq_t *dl_matrix3dq_mobilefaceblock_split (void *in, dl_matrix3dq_t *pw_1, dl_matrix3dq_t *pw_2, dl_matrix3dq_t *pw_bn_scale,dl_matrix3dq_t *pw_bn_offset,
-                                        dl_matrix3dq_t *dw, dl_matrix3dq_t *dw_bn_scale,dl_matrix3dq_t *dw_bn_offset,
-                                        dl_matrix3dq_t *pw_linear_1, dl_matrix3dq_t *pw_linear_2, dl_matrix3dq_t *pw_linear_bn_scale,dl_matrix3dq_t *pw_linear_bn_offset,
-                                        int pw_exponent,int dw_exponent,int pw_linear_exponent,int stride_x, int stride_y, int padding, int mode, int shortcut);
+dl_matrix3dq_t *dl_matrix3dq_global_pool(dl_matrix3dq_t *in);
--- a/tools/sdk/include/esp-face/fd_forward.h
+++ b/tools/sdk/include/esp-face/fd_forward.h
@@ -40,13 +40,27 @@ extern "C"

    typedef struct
    {
-        float min_face;                 /// the minimum size of face can be detected
-        float pyramid;                  /// the pyramid scale
-        int pyramid_times;              /// the pyramid resizing times
-        threshold_config_t p_threshold; /// score, nms and candidate threshold of pnet
-        threshold_config_t r_threshold; /// score, nms and candidate threshold of rnet
-        threshold_config_t o_threshold; /// score, nms and candidate threshold of onet
-        mtmn_resize_type type;          /// image resize type. 'pyramid' will lose efficacy, when 'type'==FAST.
+        float score;          /// score threshold for filter candidates by score
+        float nms;            /// nms threshold for nms process
+        int candidate_number; /// candidate number limitation for each net
+    } threshold_config_t;
+
+    typedef struct
+    {
+        int w;                        /// net width
+        int h;                        /// net height
+        threshold_config_t threshold; /// threshold of net
+    } net_config_t;
+
+    typedef struct
+    {
+        float min_face;                 /// The minimum size of a detectable face
+        float pyramid;                  /// The scale of the gradient scaling for the input images
+        int pyramid_times;              /// The pyramid resizing times
+        threshold_config_t p_threshold; /// The thresholds for P-Net. For details, see the definition of threshold_config_t
+        threshold_config_t r_threshold; /// The thresholds for R-Net. For details, see the definition of threshold_config_t
+        threshold_config_t o_threshold; /// The thresholds for O-Net. For details, see the definition of threshold_config_t
+        mtmn_resize_type type;          /// The image resize type. 'pyramid' will lose efficacy, when 'type'==FAST.
    } mtmn_config_t;

    static inline mtmn_config_t mtmn_init_config()
--- a/tools/sdk/include/esp-face/fr_forward.h
+++ b/tools/sdk/include/esp-face/fr_forward.h
@@ -135,6 +135,7 @@ extern "C"
    uint8_t delete_face(face_id_list *l);
    int8_t delete_face_with_name(face_id_name_list *l, char *name);
    void delete_face_all_with_name(face_id_name_list *l);
+    dl_matrix3d_t *get_face_id(dl_matrix3du_t *aligned_face);
 #if __cplusplus
 }
 #endif
--- a/tools/sdk/include/esp-face/frmn.h
+++ b/tools/sdk/include/esp-face/frmn.h
@@ -24,6 +24,10 @@ extern "C"
     */
    dl_matrix3dq_t *frmn_q(dl_matrix3dq_t *in, dl_conv_mode mode);

+    dl_matrix3dq_t *frmn2_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+    dl_matrix3dq_t *frmn2p_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+    dl_matrix3dq_t *frmn2c_q(dl_matrix3dq_t *in, dl_conv_mode mode);
+
 #if __cplusplus
 }
 #endif
--- a/tools/sdk/include/esp-face/mtmn.h
+++ b/tools/sdk/include/esp-face/mtmn.h
@@ -28,20 +28,7 @@ extern "C"
 {
 #endif
 #include "dl_lib_matrix3d.h"
-
-    typedef struct
-    {
-        float score;          /// score threshold for filter candidates by score
-        float nms;            /// nms threshold for nms process
-        int candidate_number; /// candidate number limitation for each net
-    } threshold_config_t;
-
-    typedef struct
-    {
-        int w;                        /// net width
-        int h;                        /// net height
-        threshold_config_t threshold; /// threshold of net
-    } net_config_t;
+#include "dl_lib_matrix3dq.h"

    typedef struct
    {
@@ -51,30 +38,89 @@ extern "C"
    } mtmn_net_t;

    /**
-     * @brief Forward the pnet process, coarse detection
+     * @brief Free a mtmn_net_t
+     *
+     * @param p         A mtmn_net_t pointer
+     */
+    void mtmn_net_t_free(mtmn_net_t *p);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in float.
     *
     * @param in        Image matrix, rgb888 format, size is 320x240
     * @return          Scores for every pixel, and box offset with respect.
     */
-    mtmn_net_t *pnet(dl_matrix3du_t *in);
+    mtmn_net_t *pnet_lite_f(dl_matrix3du_t *in);

    /**
-     * @brief Forward the rnet process, fine determine the boxes from pnet
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in float.
     *
     * @param in        Image matrix, rgb888 format
     * @param threshold Score threshold to detect human face
     * @return          Scores for every box, and box offset with respect.
     */
-    mtmn_net_t *rnet_with_score_verify(dl_matrix3du_t *in, float threshold);
+    mtmn_net_t *rnet_lite_f_with_score_verify(dl_matrix3du_t *in, float threshold);

    /**
-     * @brief Forward the onet process, fine determine the boxes from rnet
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in float.
     *
     * @param in        Image matrix, rgb888 format
     * @param threshold Score threshold to detect human face
     * @return          Scores for every box, box offset, and landmark with respect.
     */
-    mtmn_net_t *onet_with_score_verify(dl_matrix3du_t *in, float threshold);
+    mtmn_net_t *onet_lite_f_with_score_verify(dl_matrix3du_t *in, float threshold);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet_lite_q(dl_matrix3du_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_lite_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_lite_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the pnet process, coarse detection. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format, size is 320x240
+     * @return          Scores for every pixel, and box offset with respect.
+     */
+    mtmn_net_t *pnet_heavy_q(dl_matrix3du_t *in, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the rnet process, fine determine the boxes from pnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, and box offset with respect.
+     */
+    mtmn_net_t *rnet_heavy_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);
+
+    /**
+     * @brief Forward the onet process, fine determine the boxes from rnet. Calculate in quantization.
+     *
+     * @param in        Image matrix, rgb888 format
+     * @param threshold Score threshold to detect human face
+     * @return          Scores for every box, box offset, and landmark with respect.
+     */
+    mtmn_net_t *onet_heavy_q_with_score_verify(dl_matrix3du_t *in, float threshold, dl_conv_mode mode);

 #ifdef __cplusplus
 }