Update IDF to v3.2 977854975 (#2771)

* Update IDF to v3.2 977854975 * Update app_httpd.cpp
2025-07-27 09:17:16 +02:00 · 2019-05-12 18:52:23 +03:00
parent aff2e42ac6
commit 0acf19af8f
105 changed files with 518 additions and 1050 deletions
--- a/tools/sdk/include/esp-face/dl_lib.h
+++ b/tools/sdk/include/esp-face/dl_lib.h
@ -1,336 +0,0 @@
-#ifndef DL_LIB_H
-#define DL_LIB_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "dl_lib_matrix.h"
-#include "dl_lib_matrixq.h"
-#include "dl_lib_matrix3d.h"
-#include "dl_lib_matrix3dq.h"
-
-    typedef int padding_state;
-    /**
-     * @brief Does a fast version of the exp() operation on a floating point number.
-     *
-     * As described in https://codingforspeed.com/using-faster-exponential-approximation/
-     * Should be good til an input of 5 or so with a steps factor of 8.
-     *
-     * @param in Floating point input
-     * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
-     * @return Exp()'ed output
-     */
-    fptp_t fast_exp(double x, int steps);
-
-    /**
-     * @brief Does a softmax operation on a matrix.
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so,
-                                             output results overwrite the input.
-    */
-    void dl_softmax(const dl_matrix2d_t *in,
-                    dl_matrix2d_t *out);
-
-    /**
-     * @brief Does a softmax operation on a quantized matrix.
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
-
-    /**
-     * @brief Does a sigmoid operation on a floating point number
-     *
-     * @param in Floating point input
-     * @return Sigmoid output
-     */
-    fptp_t dl_sigmoid_op(fptp_t in);
-
-    /**
-     * @brief Does a sigmoid operation on a matrix.
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
-
-    /**
-     * @brief Does a tanh operation on a floating point number
-     *
-     * @param in        Floating point input number
-     * @return Tanh value
-     */
-    fptp_t dl_tanh_op(fptp_t v);
-
-    /**
-     * @brief Does a tanh operation on a matrix.
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
-
-    /**
-     * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
-     *
-     * @param in        Floating point input
-     * @param clip      If value is higher than this, it will be clipped to this value
-     * @return Relu output
-     */
-    fptp_t dl_relu_op(fptp_t in, fptp_t clip);
-
-    /**
-     * @brief Does a ReLu operation on a matrix.
-     *
-     * @param in        Input matrix
-     * @param clip      If values are higher than this, they will be clipped to this value
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
-
-    /**
-     * @brief Fully connected layer operation
-     *
-     * @param in        Input vector
-     * @param weight    Weights of the neurons
-     * @param bias      Biases for the neurons. Can be NULL if a bias of 0 is required.
-     * @param out       Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
-     */
-    void dl_fully_connect_layer(const dl_matrix2d_t *in,
-                                const dl_matrix2d_t *weight,
-                                const dl_matrix2d_t *bias,
-                                dl_matrix2d_t *out);
-
-    /**
-     * @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
-     * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
-     * this matrix only needs to be calculated once. This function does that.
-     *
-     * @param
-     * @return
-     */
-    void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance,
-                                        fptp_t epsilon,
-                                        dl_matrix2d_t *out);
-
-    /**
-     * @brief Batch-normalize a matrix
-     *
-     * @param m         The matrix to normalize
-     * @param offset    Offset matrix
-     * @param scale     Scale matrix
-     * @param mean      Mean matrix
-     * @param sqrtvari  Matrix precalculated using dl_batch_normalize_get_sqrtvar
-     * @return
-     */
-    void dl_batch_normalize(dl_matrix2d_t *m,
-                            const dl_matrix2d_t *offset,
-                            const dl_matrix2d_t *scale,
-                            const dl_matrix2d_t *mean,
-                            const dl_matrix2d_t *sqrtvari);
-
-    /**
-     * @brief Do a basic LSTM layer pass.
-     *
-     * @warning Returns state_h pointer, so do not free result.
-
-    * @param in        Input vector
-    * @param state_c   Internal state of the LSTM network
-    * @param state_h   Internal state (previous output values) of the LSTM network
-    * @param weights   Weights for the neurons
-    * @param bias      Bias for the neurons. Can be NULL if no bias is required
-    * @return          Output values of the neurons
-    */
-    dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in,
-                                       dl_matrix2d_t *state_c,
-                                       dl_matrix2d_t *state_h,
-                                       const dl_matrix2d_t *weight,
-                                       const dl_matrix2d_t *bias);
-
-    /**
-     * @brief Do a basic LSTM layer pass, partial quantized version.
-     * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
-     *
-     * @warning Returns state_h pointer, so do not free result.
-
-    * @param in		Input vector
-    * @param state_c	Internal state of the LSTM network
-    * @param state_h	Internal state (previous output values) of the LSTM network
-    * @param weights	Weights for the neurons, need to be quantised
-    * @param bias		Bias for the neurons. Can be NULL if no bias is required
-    * @return			Output values of the neurons
-    */
-    dl_matrix2d_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in,
-                                                         dl_matrix2d_t *state_c,
-                                                         dl_matrix2d_t *state_h,
-                                                         const dl_matrix2dq_t *weight,
-                                                         const dl_matrix2d_t *bias);
-
-    /**
-     * @brief Do a fully-connected layer pass, fully-quantized version.
-     *
-     * @param in        Input vector
-     * @param weight    Weights of the neurons
-     * @param bias      Bias values of the neurons. Can be NULL if no bias is needed.
-     * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
-     * @return          Output values of the neurons
-     */
-    void dl_fully_connect_layer_q(const dl_matrix2dq_t *in,
-                                  const dl_matrix2dq_t *weight,
-                                  const dl_matrix2dq_t *bias,
-                                  dl_matrix2dq_t *out,
-                                  int shift);
-
-    /**
-     * @brief Do a basic LSTM layer pass, fully-quantized version
-     *
-     * @warning Returns state_h pointer, so do not free result.
-
-    * @param in        Input vector
-    * @param state_c   Internal state of the LSTM network
-    * @param state_h   Internal state (previous output values) of the LSTM network
-    * @param weights   Weights for the neurons
-    * @param bias      Bias for the neurons. Can be NULL if no bias is required
-    * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
-    * @return          Output values of the neurons
-    */
-    dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in,
-                                          dl_matrix2dq_t *state_c,
-                                          dl_matrix2dq_t *state_h,
-                                          const dl_matrix2dq_t *weight,
-                                          const dl_matrix2dq_t *bias,
-                                          int shift);
-
-    /**
-     * @brief Batch-normalize a matrix, fully-quantized version
-     *
-     * @param m         The matrix to normalize
-     * @param offset    Offset matrix
-     * @param scale     Scale matrix
-     * @param mean      Mean matrix
-     * @param sqrtvari  Matrix precalculated using dl_batch_normalize_get_sqrtvar
-     * @param shift     Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
-     * @return
-     */
-    void dl_batch_normalize_q(dl_matrix2dq_t *m,
-                              const dl_matrix2dq_t *offset,
-                              const dl_matrix2dq_t *scale,
-                              const dl_matrix2dq_t *mean,
-                              const dl_matrix2dq_t *sqrtvari,
-                              int shift);
-
-    /**
-     * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
-     * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
-     * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
-     *
-     * @param in        Fixed-point input
-     * @param clip      If value is higher than this, it will be clipped to this value
-     * @return Relu output
-     */
-    qtp_t dl_relu_q_op(qtp_t in,
-                       qtp_t clip);
-
-    /**
-     * @brief Does a ReLu operation on a matrix, quantized version
-     *
-     * @param in        Input matrix
-     * @param clip      If values are higher than this, they will be clipped to this value
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_relu_q(const dl_matrix2dq_t *in,
-                   fptp_t clip,
-                   dl_matrix2dq_t *out);
-
-    /**
-     * @brief Does a sigmoid operation on a fixed-point number.
-     * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
-     * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
-     *
-     * @param in Fixed-point input
-     * @return Sigmoid output
-     */
-    int dl_sigmoid_op_q(const int in);
-
-    /**
-     * @brief Does a sigmoid operation on a matrix, quantized version
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_sigmoid_q(const dl_matrix2dq_t *in,
-                      dl_matrix2dq_t *out);
-
-    /**
-     * @brief Does a tanh operation on a matrix, quantized version
-     *
-     * @param in        Input matrix
-     * @param out       Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
-     */
-    void dl_tanh_q(const dl_matrix2dq_t *in,
-                   dl_matrix2dq_t *out);
-
-    /**
-     * @brief Do a basic CNN layer pass.
-     *
-     * @Warning This just supports the single channel input image, and the output is single row matrix.
-                That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
-    *
-    * @param in             Input single channel image
-    * @param weight         Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
-    * @param bias           Bias for the CNN layer.
-    * @param filter_height  The height of convolution kernel
-    * @param filter_width   The width of convolution kernel
-    * @param out_channels   The number of output channels of convolution kernel
-    * @param stride_x       The step length of the convolution window in x(width) direction
-    * @param stride_y       The step length of the convolution window in y(height) direction
-    * @param pad            One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
-    * @param out            The result of CNN layer, out->h=1.
-    * @return               The result of CNN layer.
-    */
-    dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in,
-                                       const dl_matrix2d_t *weight,
-                                       const dl_matrix2d_t *bias,
-                                       int filter_width,
-                                       int filter_height,
-                                       const int out_channels,
-                                       const int stride_x,
-                                       const int stride_y,
-                                       padding_state pad,
-                                       const dl_matrix2d_t *out);
-
-    /**
-     * @brief Do a basic CNN layer pass, quantised wersion.
-     *
-     * @Warning This just supports the single channel input image, and the output is single row matrix.
-                That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
-    *
-    * @param in             Input single channel image
-    * @param weight         Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
-    * @param bias           Bias of the neurons.
-    * @param filter_height  The height of convolution kernel
-    * @param filter_width   The width of convolution kernel
-    * @param out_channels   The number of output channels of convolution kernel
-    * @param stride_x       The step length of the convolution window in x(width) direction
-    * @param stride_y       The step length of the convolution window in y(height) direction
-    * @param pad            One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
-    * @param out            The result of CNN layer, out->h=1
-    * @return               The result of CNN layer
-    */
-    dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in,
-                                                        const dl_matrix2dq_t *weight,
-                                                        const dl_matrix2d_t *bias,
-                                                        int filter_width,
-                                                        int filter_height,
-                                                        const int out_channels,
-                                                        const int stride_x,
-                                                        const int stride_y,
-                                                        padding_state pad,
-                                                        const dl_matrix2d_t *out);
-
-#ifdef __cplusplus
-}
-#endif
-#endif
--- a/tools/sdk/include/esp-face/dl_lib_coefgetter_if.h
+++ b/tools/sdk/include/esp-face/dl_lib_coefgetter_if.h
@ -1,47 +0,0 @@
-#ifndef DL_LIB_COEFGETTER_IF_H
-#define DL_LIB_COEFGETTER_IF_H
-
-#include "dl_lib_matrix.h"
-#include "dl_lib_matrixq.h"
-#include "dl_lib_matrix3d.h"
-#include "dl_lib_matrix3dq.h"
-
-//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
-//dl_batch_normalize_get_sqrtvar first.
-#define COEF_GETTER_HINT_BNVAR (1<<0)
-
-/*
-This struct describes the basic information of model data:
-word_num: the number of wake words or speech commands
-word_list: the name list of wake words or speech commands
-thres_list: the threshold list of wake words or speech commands
-info_str: the string used to reflect the version and information of model data
-          which consist of the architecture of network, the version of model data, wake words and their threshold
-*/
-typedef struct {
-    int word_num;
-    char **word_list;
-    int *win_list;
-    float *thresh_list;
-    char *info_str;
-} model_info_t;
-
-/*
-This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
-For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
-coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
-to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
-is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
-memory for the returned matrices, when applicable.
-*/
-typedef struct {
-    const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
-    const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
-    const dl_matrix3d_t* (*getter_3d)(const char *name, void *arg, int hint);
-    const dl_matrix3dq_t* (*getter_3dq)(const char *name, void *arg, int hint);
-    void (*free_f)(const dl_matrix2d_t *m);
-    void (*free_q)(const dl_matrix2dq_t *m);
-    const model_info_t* (*getter_info)(void *arg);
-} model_coeff_getter_t;
-
-#endif
--- a/tools/sdk/include/esp-face/dl_lib_matrix.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix.h
@ -1,216 +0,0 @@
-#ifndef DL_LIB_MATRIX_H
-#define DL_LIB_MATRIX_H
-
-typedef float fptp_t;
-
-
-//Flags for matrices
-#define DL_MF_FOREIGNDATA (1<<0)  /*< Matrix *item data actually points to another matrix and should not be freed */
-
-//'Normal' float matrix
-typedef struct {
-    int w;          /*< Width */
-    int h;          /*< Height */
-    int stride;     /*< Row stride, essentially how many items to skip to get to the same position in the next row */
-    int flags;      /*< Flags. OR of DL_MF_* values */
-    fptp_t *item;   /*< Pointer to item array */
-} dl_matrix2d_t;
-
-//Macro to quickly access the raw items in a matrix
-#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
-
-
-//#define DL_ITM3D(m, n, x, y, z) (m)->item[(n) * (m)->stride * (m)->c + (z) * (m)->stride + (y) * (m)->w + (x)]
-
-/**
- * @brief Allocate a matrix
- *
- * @param w     Width of the matrix
- * @param h     Height of the matrix
- * @return The matrix, or NULL if out of memory
- */
-dl_matrix2d_t *dl_matrix_alloc(int w, int h);
-
-
-/**
- * @brief Free a matrix
- * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
- *
- * @param m     Matrix to free
- */
-void dl_matrix_free(dl_matrix2d_t *m);
-
-/**
- * @brief Zero out the matrix
- * Sets all entries in the matrix to 0.
- *
- * @param m     Matrix to zero
- */
-void dl_matrix_zero(dl_matrix2d_t *m);
-
-/**
- * @brief Generate a new matrix using a range of items from an existing matrix.
- * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
- * to the existing data. Changing the data in the resulting matrix, as a result, will also change
- * the data in the existing matrix that has been sliced.
- *
- * @param x     X-offset of the origin of the returned matrix within the sliced matrix
- * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
- * @param w     Width of the resulting matrix
- * @param h     Height of the resulting matrix
- * @param in    Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
- * @return The resulting slice matrix, or NULL if out of memory
- */
-dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
-
-/**
- * @brief select a range of items from an existing matrix and flatten them into one dimension.
- *
- * @Warning The results are flattened in row-major order.
- *   
- * @param x     X-offset of the origin of the returned matrix within the sliced matrix
- * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
- * @param w     Width of the resulting matrix
- * @param h     Height of the resulting matrix
- * @param in    Old matrix to re-use. Passing NULL will allocate a new matrix.
- * @return  The resulting flatten matrix, or NULL if out of memory
- */
-dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
-
-/**
- * @brief Generate a matrix from existing floating-point data
- *
- * @param w     Width of resulting matrix
- * @param h     Height of resulting matrix
- * @param data  Data to populate matrix with
- * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
- */
-dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
-
-
-/**
- * @brief Multiply a pair of matrices item-by-item: res=a*b
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Multiplicated data. Can be equal to a or b to overwrite that.
- */
-void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
-
-/**
- * @brief Do a dotproduct of two matrices : res=a.b
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
- */
-void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
-
-/**
- * @brief Add a pair of matrices item-by-item: res=a-b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Added data. Can be equal to a or b to overwrite that.
- */
-void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
-
-
-/**
- * @brief Divide a pair of matrices item-by-item: res=a/b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Divided data. Can be equal to a or b to overwrite that.
- */
-void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
-
-/**
- * @brief Subtract a matrix from another, item-by-item: res=a-b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Subtracted data. Can be equal to a or b to overwrite that.
- */
-void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
-
-/**
- * @brief Add a constant to every item of the matrix
- *
- * @param subj  Matrix to add the constant to
- * @param add   The constant
- */
-void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
-
-
-/**
- * @brief Concatenate the rows of two matrices into a new matrix
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @return A newly allocated array with as avlues a|b
- */
-dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
-
-
-/**
- * @brief Print the contents of a matrix to stdout. Used for debugging.
- *
- * @param a     The matrix to print.
- */
-void dl_printmatrix(const dl_matrix2d_t *a);
-
-/**
- * @brief Return the average square error given a correct and a test matrix.
- *
- * ...Well, more or less. If anything, it gives an indication of the error between
- * the two. Check the code for the exact implementation.
- *
- * @param a     First of the two matrices to compare
- * @param b     Second of the two matrices to compare
- * @return value indicating the relative difference between matrices
- */
-float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
-
-
-
-/**
- * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
- *
- * @param a     First of the two matrices to compare
- * @param b     Second of the two matrices to compare
- * @return true if the two matrices are shaped the same, false otherwise.
- */
-int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
-
-
-/**
- * @brief Get a specific item from the matrix
- *
- * Please use these for external matrix access instead of DL_ITM
- *
- * @param m     Matrix to access
- * @param x     Column address
- * @param y     Row address
- * @return Value in that position
- */
-inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { 
-    return DL_ITM(m, x, y);
-}
-
-/**
- * @brief Set a specific item in the matrix to the given value
- *
- * Please use these for external matrix access instead of DL_ITM
- *
- * @param m     Matrix to access
- * @param x     Column address
- * @param y     Row address
- * @param val   Value to write to that position
- */
-inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { 
-    DL_ITM(m, x, y)=val;
-}
-
-#endif
-
--- a/tools/sdk/include/esp-face/dl_lib_matrix3d.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3d.h
@ -1,5 +1,6 @@
 #pragma once

+#include <stdint.h>
 typedef float fptp_t;
 typedef uint8_t uc_t;

@ -92,6 +93,16 @@ void dl_matrix3d_free(dl_matrix3d_t *m);
 */
 void dl_matrix3du_free(dl_matrix3du_t *m);

+
+/*
+ * @brief Dot product with a vector and matrix
+ *
+ * @param out   Space to put the result
+ * @param in    input vector
+ * @param f     filter matrix
+ */
+void dl_matrix3d_dot_product(dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);
+
 /**
 * @brief Do a relu (Rectifier Linear Unit) operation, update the input matrix3d
 *
@ -162,6 +173,9 @@ void dl_matrix3du_slice_copy(dl_matrix3du_t *dst,
                             int w,
                             int h);

+
+void dl_matrix3d_conv_1x1 (dl_matrix3d_t *out, dl_matrix3d_t *in, dl_matrix3d_t *f);
+
 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
 *
@ -183,6 +197,11 @@ dl_matrix3d_t *dl_matrix3d_conv(dl_matrix3d_t *in,
                                int padding,
                                int mode);

+void dl_matrix3d_conv_3x3_normal (dl_matrix3d_t *out,
+                                    dl_matrix3d_t *in,
+                                    dl_matrix3d_t *f,
+                                    int step_x,
+                                    int step_y);
 /**
 * @brief Do a general CNN layer pass, dimension is (number, width, height, channel)
 *
@ -223,6 +242,11 @@ dl_matrix3d_t *dl_matrix3d_depthwise_conv(dl_matrix3d_t *in,
                                          int padding,
                                          int mode);

+void dl_matrix3d_depthwise_conv_3x3_normal(dl_matrix3d_t *out,
+                                            dl_matrix3d_t *in,
+                                            dl_matrix3d_t *f,
+                                            int step_x,
+                                            int step_y);
 /**
 * @brief Do a mobilenet block forward, dimension is (number, width, height, channel)
 *
@ -418,3 +442,8 @@ void dl_matrix3d_print(dl_matrix3d_t *m, char *message);
 * @param message        name of matrix
 */
 void dl_matrix3du_print(dl_matrix3du_t *m, char *message);
+
+
+void dl_matrix3d_init_bias (dl_matrix3d_t *out, dl_matrix3d_t *bias);
+
+void dl_matrix3d_multiply(dl_matrix3d_t *out, dl_matrix3d_t *in1, dl_matrix3d_t *in2);
--- a/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrix3dq.h
@ -85,6 +85,22 @@ dl_matrix3dq_t *dl_matrix3dq_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, d
 dl_matrix3dq_t *dl_matrix3dq_conv_normal (dl_matrix3dq_t *in, dl_matrix3dq_t *filter, dl_matrix3dq_t *bias,
                                    int stride_x, int stride_y, int padding, int exponent, int mode);

+void dl_matrix3dq_conv_1x1 (dl_matrix3dq_t *out, dl_matrix3dq_t *in, dl_matrix3dq_t *f, dl_conv_mode mode);
+
+void dl_matrix3dq_conv_3x3_normal (dl_matrix3dq_t *out,
+                                    dl_matrix3dq_t *in,
+                                    dl_matrix3dq_t *f,
+                                    int step_x,
+                                    int step_y);
+dl_matrix3dq_t *dl_matrix3dq_conv_3x3_with_bn (dl_matrix3dq_t *in,
+                                    dl_matrix3dq_t *f,
+                                    dl_matrix3dq_t *scale,
+                                    dl_matrix3dq_t *offset,
+                                    int step_x,
+                                    int step_y,
+                                    int padding,
+                                    int exponent,
+                                    int relu);
 /**
 * @brief Print the matrix3d items
 *
@ -95,6 +111,15 @@ void dl_matrix3dq_print (dl_matrix3dq_t *m, char *message);

 dl_matrix3dq_t *dl_matrix3dq_depthwise_conv (dl_matrix3dq_t *in, dl_matrix3dq_t *filter,
                                    int stride_x, int stride_y, int padding, int exponent, int mode);
+dl_matrix3dq_t *dl_matrix3dq_depthwise_conv_3x3_with_bn(dl_matrix3dq_t *in,
+                                            dl_matrix3dq_t *f,
+                                            dl_matrix3dq_t *scale,
+                                            dl_matrix3dq_t *offset,
+                                            int step_x,
+                                            int step_y,
+                                            int padding,
+                                            int exponent,
+                                            int relu);

 void dl_matrix3dq_relu (dl_matrix3dq_t *m, fptp_t clip);

--- a/tools/sdk/include/esp-face/dl_lib_matrixq.h
+++ b/tools/sdk/include/esp-face/dl_lib_matrixq.h
@ -1,359 +0,0 @@
-#ifndef DL_LIB_MATRIXQ_H
-#define DL_LIB_MATRIXQ_H
-
-#include <stdint.h>
-#include "dl_lib_matrix.h"
-
-typedef int16_t qtp_t;
-
-//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted 
-//for easy use as a multiplicand without stressing out the flash cache too much.
-typedef struct {
-    int w;
-    int h;
-    int stride; //Normally equals h, not w!
-    int flags;
-    int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
-    qtp_t *itemq;
-} dl_matrix2dq_t;
-
-#define DL_QTP_SHIFT 15
-#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
-#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
-#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
-
-#define DL_SHIFT_AUTO 32
-
-/**
- * @info About quantized matrices and shift values
- *
- * Grab a coffee (or tea, or hot water)  and sit down when you read this for the first 
- * time. Quantized matrices can speed up your operations, but come with some quirks, and
- * it's good to understand how they work before using them.
- *
- * The data in the quantized matrix type is stored similarily to floating-point types:
- * when storing a real value, the value is stored as a mantissa (base number) and an
- * exponent. The 'real' value that can be re-derived from those two numbers is something
- * similar to mantissa*2^exponent. Up to this point, there's not that much difference from 
- * the standard floating point implementations like e.g. IEEE-754.
- *
- * The difference with respect to quantized matrices is that for a quantized matrix, it is 
- * assumed all values stored have more-or-less the same order of magnitude. This allows the
- * matrix to only store all the mantissas, while the exponents are shared; there is only one 
- * exponent for the entire matrix. This makes it quicker to handle matrix operations - the
- * logic to fix the exponents only needs to happen once, while the rest can be done in simple
- * integer arithmetic. It also nets us some memory savings - while normally a floating point
- * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the 
- * memory requirements.
- *
- * While most of the details of handling the intricacies of the quantized matrixes are done
- * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, 
- * specifically in places where addition/subtraction/division happens.
- *
- * The problem is that the routines do not know what the size of the resulting operation is. For
- * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
- * to overflow the mantissa of the result if the exponent is the same. However, if by default we
- * assume the mantissas needs to be scaled back, we may lose precision.
- *
- * In order to counter this, all operations that have this issue have a ``shift`` argument. If 
- * the argument is zero, the routine will be conservative, that is, increase the exponent of 
- * the result to such an extent it's mathematically impossible a value in the result will exceed
- * the maximum value that can be stored. However, when this argument is larger than zero, the
- * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
- * but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
- * If this happens, the value will be clipped to the largest (or, for negative values, smallest)
- * value possible. (Neural networks usually are okay with this happening for a limited amount
- * of matrix indices).
- *
- * For deciding on these shift values, it is recommended to start with a shift value of one, then
- * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. 
- * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
- * shift values of 0 or 1 make sense; these routines will error out if you try to do something
- * else.
- *
- * For neural networks and other noise-tolerant applications, note that even when 
- * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
- * to slightly improved precision. Feel free to experiment.
- **/
-
-
-/**
- * @brief Allocate a matrix
- *
- * @param w     Width of the matrix
- * @param h     Height of the matrix
- * @return The matrix, or NULL if out of memory
- */
-dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
-
-/**
- * @brief Convert a floating-point matrix to a quantized matrix
- *
- * @param m     Floating-point matrix to convert
- * @param out   Quantized matrix to re-use. If NULL, allocate a new one.
- * @Return The quantized version of the floating-point matrix
- */
-dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
-
-
-/**
- * TODO: DESCRIBE THIS FUNCTION
- */
-dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
-
-
-/**
- * @brief Convert a quantized matrix to a floating-point one.
- *
- * @param m     Floating-point matrix to convert
- * @param out   Quantized matrix to re-use. If NULL, allocate a new one.
- * @Return The quantized version of the floating-point matrix
- **/
-dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
-
-
-/**
- * @brief Free a quantized matrix
- * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
- *
- * @param m     Matrix to free
- */
-void dl_matrixq_free(dl_matrix2dq_t *m);
-
-/**
- * @brief Zero out the matrix
- * Sets all entries in the matrix to 0.
- *
- * @param m     Matrix to zero
- */
-void dl_matrixq_zero(dl_matrix2dq_t *m);
-
-
-/**
- * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
- * @param shift Shift ratio
- */
-void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
-
-/**
- * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
- */
-void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
-
-/**
- * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
- *
- * Result is a fixed-point matrix. 
- *
- * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
- * much slower than dl_matrixq_dot .
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
- * @param shift Shift ratio
- */
-void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
-
-/**
- * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. 
- *
- * Result is a floating-point matrix. 
- *
- * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
- * much slower than dl_matrixq_dot_matrix_out.
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Dotproduct data. *Must* be a *different* matrix from a or b!
- */
-void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
-
-/**
- * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
- *
- * @param a     First multiplicand; float matrix
- * @param b     Second multiplicand; quantized matrix
- * @param res   Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
- */
-void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
-
-
-/**
- * @brief Print the contents of a quantized matrix to stdout. Used for debugging.
- *
- * @param a     The matrix to print.
- */
-void dl_printmatrixq(const dl_matrix2dq_t *a);
-
-
-/**
- * @brief Add a pair of quantizedmatrices item-by-item: res=a-b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Added data. Can be equal to a or b to overwrite that.
- * @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
- */
-void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
-
-/**
- * @brief Generate a new matrix using a range of items from an existing matrix.
- * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
- * to the existing data. Changing the data in the resulting matrix, as a result, will also change
- * the data in the existing matrix that has been sliced.
- *
- * @Warning In contrast to the floating point equivalent of this function, the fixed-point version
- * of this has the issue that as soon as the output exponent of one of the slices changes, the data
- * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
- * use this function, either treat the slices as read-only, or assume the sliced matrix contains
- * garbage after modifying the data in one of the slices.
- *
- * @param x     X-offset of the origin of the returned matrix within the sliced matrix
- * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
- * @param w     Width of the resulting matrix
- * @param h     Height of the resulting matrix
- * @param in    Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
- * @return The resulting slice matrix, or NULL if out of memory
- */
-dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
-
-/**
- * @brief select a range of items from an existing matrix and flatten them into one dimension.
- *
- * @Warning The results are flattened in row-major order.
- *   
- * @param x     X-offset of the origin of the returned matrix within the sliced matrix
- * @param y     Y-offset of the origin of the returned matrix within the sliced matrix
- * @param w     Width of the resulting matrix
- * @param h     Height of the resulting matrix
- * @param in    Old matrix to re-use. Passing NULL will allocate a new matrix.
- * @return The resulting flatten matrix, or NULL if out of memory
- */
-dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
-
-/**
- * @brief Subtract a quantized matrix from another, item-by-item: res=a-b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Subtracted data. Can be equal to a or b to overwrite that.
- * @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
- */
-void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
-
-/**
- * @brief Multiply a pair of quantized matrices item-by-item: res=a*b
- *
- * @param a     First multiplicand
- * @param b     Second multiplicand
- * @param res   Multiplicated data. Can be equal to a or b to overwrite that matrix.
- */
-void dl_matrixq_mul(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res);
-
-/**
- * @brief Divide a pair of quantized matrices item-by-item: res=a/b
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @param res   Divided data. Can be equal to a or b to overwrite that.
- */
-void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
-
-/**
- * @brief Check if two quantized matrices have the same shape, that is, the same amount of 
- * rows and columns
- *
- * @param a     First of the two matrices to compare
- * @param b     Second of the two matrices to compare
- * @return true if the two matrices are shaped the same, false otherwise.
- */
-int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
-
-/**
- * @brief Concatenate the rows of two quantized matrices into a new matrix
- *
- * @param a     First matrix
- * @param b     Second matrix
- * @return A newly allocated quantized matrix with as values a|b
- */
-dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
-
-/**
- * @brief Add a constant to every item of the quantized matrix
- *
- * @param subj  Matrix to add the constant to
- * @param add   The constant
- */
-void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
-
-/**
- * @brief Check the sanity of a quantized matrix
- *
- * Due to the nature of quantized matrices, depending on the calculations a quantized
- * matrix is the result of and the shift values chosen in those calculations, a quantized
- * matrix may have an exponent and mantissas that lead to a loss of precision, either because
- * most significant mantissa bits are unused, or because a fair amount of mantissas are 
- * clipped. This function checks if this is the case and will report a message to stdout
- * if significant loss of precision is detected.
- *
- * @param m     The quantized matrix to check
- * @param name  A string to be displayed in the message if the sanity check fails
- * @return True if matrix is sane, false otherwise
- **/
-
-int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
-
-/**
- * @brief re-adjust the exponent of the matrix to fit the mantissa better
- *
- * This function will shift up all the data in the mantissas so there are no
- * most-significant bits that are unused in all mantissas. It will also adjust
- * the exponent to keep the actua values in the matrix the same.
- *
- * Some operations done on a matrix, especially operations that re-use the
- * result of earlier operations done in the same way, can lead to the loss of
- * data because the exponent of the quantized matrix is never re-adjusted. You
- * can do that implicitely by calling this function.
- *
- * @param m     The matrix to re-adjust
-**/
-void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
-
-
-
-/**
- * @brief Get the floating-point value of a specific item from the quantized matrix
- *
- * @param m     Matrix to access
- * @param x     Column address
- * @param y     Row address
- * @return Value in that position
- */
-fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
-
-/**
- * @brief Set a specific item in the quantized matrix to the given 
- * floating-point value
- *
- * @warning If the given value is more than the exponent in the quantized matrix
- * allows for, all mantissas in the matrix will be shifted down to make the value
- * 'fit'. If, however, the exponent is such that the value would result in a
- * quantized mantissa of 0, nothing is done.
- *
- * @param m     Matrix to access
- * @param x     Column address
- * @param y     Row address
- * @param val   Value to write to that position
- */
-void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
-
-#endif
--- a/tools/sdk/include/esp-face/fd_forward.h
+++ b/tools/sdk/include/esp-face/fd_forward.h
@ -29,22 +29,41 @@ extern "C"
 #endif

 #include "image_util.h"
-#include "dl_lib.h"
+#include "dl_lib_matrix3d.h"
 #include "mtmn.h"

+    typedef enum
+    {
+        FAST = 0,
+        NORMAL = 1,
+    } mtmn_resize_type;
+
+    typedef struct
+    {
+        float min_face;                 /// the minimum size of face can be detected
+        float pyramid;                  /// the pyramid scale
+        int pyramid_times;              /// the pyramid resizing times
+        threshold_config_t p_threshold; /// score, nms and candidate threshold of pnet
+        threshold_config_t r_threshold; /// score, nms and candidate threshold of rnet
+        threshold_config_t o_threshold; /// score, nms and candidate threshold of onet
+        mtmn_resize_type type;          /// image resize type. 'pyramid' will lose efficacy, when 'type'==FAST.
+    } mtmn_config_t;
+
    static inline mtmn_config_t mtmn_init_config()
    {
        mtmn_config_t mtmn_config;
+        mtmn_config.type = FAST;
        mtmn_config.min_face = 80;
-        mtmn_config.pyramid = 0.7;
+        mtmn_config.pyramid = 0.707;
+        mtmn_config.pyramid_times = 4;
        mtmn_config.p_threshold.score = 0.6;
        mtmn_config.p_threshold.nms = 0.7;
-        mtmn_config.p_threshold.candidate_number = 100;
-        mtmn_config.r_threshold.score = 0.6;
+        mtmn_config.p_threshold.candidate_number = 20;
+        mtmn_config.r_threshold.score = 0.7;
        mtmn_config.r_threshold.nms = 0.7;
-        mtmn_config.r_threshold.candidate_number = 4;
-        mtmn_config.o_threshold.score = 0.6;
-        mtmn_config.o_threshold.nms = 0.4;
+        mtmn_config.r_threshold.candidate_number = 10;
+        mtmn_config.o_threshold.score = 0.7;
+        mtmn_config.o_threshold.nms = 0.7;
        mtmn_config.o_threshold.candidate_number = 1;

        return mtmn_config;
--- a/tools/sdk/include/esp-face/fr_forward.h
+++ b/tools/sdk/include/esp-face/fr_forward.h
@ -6,7 +6,7 @@ extern "C"
 #endif

 #include "image_util.h"
-#include "dl_lib.h"
+#include "dl_lib_matrix3d.h"
 #include "frmn.h"

 #define FACE_WIDTH 56
@ -38,23 +38,22 @@ extern "C"

    typedef struct
    {
-        face_id_node *head;               /*!< head pointer of the id list */
-        face_id_node *tail;               /*!< tail pointer of the id list */
-        uint8_t count;              /*!< number of enrolled ids */
-        uint8_t confirm_times;      /*!< images needed for one enrolling */
+        face_id_node *head;    /*!< head pointer of the id list */
+        face_id_node *tail;    /*!< tail pointer of the id list */
+        uint8_t count;         /*!< number of enrolled ids */
+        uint8_t confirm_times; /*!< images needed for one enrolling */
    } face_id_name_list;
-	
+
    typedef struct
    {
-        uint8_t head;               /*!< head index of the id list */
-        uint8_t tail;               /*!< tail index of the id list */
-        uint8_t count;              /*!< number of enrolled ids */
-        uint8_t size;               /*!< max len of id list */
-        uint8_t confirm_times;      /*!< images needed for one enrolling */
-        dl_matrix3d_t **id_list;    /*!< stores face id vectors */
+        uint8_t head;            /*!< head index of the id list */
+        uint8_t tail;            /*!< tail index of the id list */
+        uint8_t count;           /*!< number of enrolled ids */
+        uint8_t size;            /*!< max len of id list */
+        uint8_t confirm_times;   /*!< images needed for one enrolling */
+        dl_matrix3d_t **id_list; /*!< stores face id vectors */
    } face_id_list;

-
    /**
     * @brief Initialize face id list
     * 
@ -86,6 +85,10 @@ extern "C"
                      dl_matrix3du_t *src,
                      dl_matrix3du_t *dest);

+    int8_t align_face2(fptp_t *landmark,
+                       dl_matrix3du_t *src,
+                       dl_matrix3du_t *dest);
+
    dl_matrix3d_t *get_face_id(dl_matrix3du_t *aligned_face);

    /**
@ -104,11 +107,9 @@ extern "C"
     * @param id_list               An ID list
     * @return int8_t               Matched face id
     */
-    int8_t recognize_face(face_id_list *l,
-                            dl_matrix3du_t *algined_face);
-							
-    face_id_node *recognize_face_with_name(face_id_name_list *l,
-                            dl_matrix3d_t *face_id);
+    int8_t recognize_face(face_id_list *l, dl_matrix3du_t *algined_face);
+
+    face_id_node *recognize_face_with_name(face_id_name_list *l, dl_matrix3d_t *face_id);
    /**
     * @brief Produce face id according to the input aligned face, and save it to dest_id.
     * 
@ -119,12 +120,11 @@ extern "C"
     * @return 0                    Enrollment finish
     * @return >=1                  The left piece of aligned faces should be input
     */
-    int8_t enroll_face(face_id_list *l, 
-                    dl_matrix3du_t *aligned_face);
-					
-    int8_t enroll_face_with_name(face_id_name_list *l, 
-                    dl_matrix3d_t *new_id,
-                    char *name);
+    int8_t enroll_face(face_id_list *l, dl_matrix3du_t *aligned_face);
+
+    int8_t enroll_face_with_name(face_id_name_list *l,
+                                 dl_matrix3d_t *new_id,
+                                 char *name);

    /**
     * @brief Alloc memory for aligned face.
@ -133,7 +133,7 @@ extern "C"
     * @return uint8_t              left count
     */
    uint8_t delete_face(face_id_list *l);
-	int8_t delete_face_with_name(face_id_name_list *l, char *name);
+    int8_t delete_face_with_name(face_id_name_list *l, char *name);
    void delete_face_all_with_name(face_id_name_list *l);
 #if __cplusplus
 }
--- a/tools/sdk/include/esp-face/frmn.h
+++ b/tools/sdk/include/esp-face/frmn.h
@ -5,7 +5,8 @@ extern "C"
 {
 #endif

-#include "dl_lib.h"
+#include "dl_lib_matrix3d.h"
+#include "dl_lib_matrix3dq.h"

    /**
     * @brief 
--- a/tools/sdk/include/esp-face/image_util.h
+++ b/tools/sdk/include/esp-face/image_util.h
@ -27,6 +27,7 @@ extern "C"
 {
 #endif
 #include <stdint.h>
+#include <math.h>
 #include "mtmn.h"

 #define MAX_VALID_COUNT_PER_IMAGE (30)
@ -57,6 +58,7 @@ extern "C"

    typedef struct tag_box_list
    {
+        fptp_t *score;
        box_t *box;
        landmark_t *landmark;
        int len;
@ -142,12 +144,19 @@ extern "C"
        for (int i = 0; i < boxes->len; i++)
        {
            box_t *box = &(boxes->box[i]);
-            float w, h;
-            image_get_width_and_height(box, &w, &h);
-            float l = DL_IMAGE_MAX(w, h);

-            box->box_p[0] = DL_IMAGE_MAX(0, box->box_p[0] + 0.5 * (w - l));
-            box->box_p[1] = DL_IMAGE_MAX(0, box->box_p[1] + 0.5 * (h - l));
+            int x1 = round(box->box_p[0]);
+            int y1 = round(box->box_p[1]);
+            int x2 = round(box->box_p[2]);
+            int y2 = round(box->box_p[3]);
+
+            int w = x2 - x1 + 1;
+            int h = y2 - y1 + 1;
+            int l = DL_IMAGE_MAX(w, h);
+
+            box->box_p[0] = round(DL_IMAGE_MAX(0, x1) + 0.5 * (w - l));
+            box->box_p[1] = round(DL_IMAGE_MAX(0, y1) + 0.5 * (h - l));
+
            box->box_p[2] = box->box_p[0] + l - 1;
            if (box->box_p[2] > width)
            {
@ -215,6 +224,25 @@ extern "C"
     */
    void image_nms_process(image_list_t *image_list, fptp_t nms_threshold, int same_area);

+    /**
+     * @brief 
+     * 
+     * @param dimage 
+     * @param dw 
+     * @param dh 
+     * @param dc 
+     * @param simage 
+     * @param sw 
+     * @param sc 
+     */
+    void image_zoom_in_twice(uint8_t *dimage,
+                             int dw,
+                             int dh,
+                             int dc,
+                             uint8_t *simage,
+                             int sw,
+                             int sc);
+
    /**
     * @brief 
     * 
--- a/tools/sdk/include/esp-face/mtmn.h
+++ b/tools/sdk/include/esp-face/mtmn.h
@ -27,14 +27,7 @@
 extern "C"
 {
 #endif
-#include "dl_lib.h"
-
-    typedef enum
-    {
-        PNET = 0, /// P-Net
-        RNET = 1, /// R-Net
-        ONET = 2, /// O-Net
-    } net_type_en;
+#include "dl_lib_matrix3d.h"

    typedef struct
    {
@ -45,22 +38,11 @@ extern "C"

    typedef struct
    {
-        net_type_en net_type;         /// net type
-        char *file_name;              /// net name
        int w;                        /// net width
        int h;                        /// net height
        threshold_config_t threshold; /// threshold of net
    } net_config_t;

-    typedef struct
-    {
-        float min_face;                 /// the minimum size of face can be detected
-        float pyramid;                  /// the pyramid scale
-        threshold_config_t p_threshold; /// score, nms and candidate threshold of pnet
-        threshold_config_t r_threshold; /// score, nms and candidate threshold of rnet
-        threshold_config_t o_threshold; /// score, nms and candidate threshold of onet
-    } mtmn_config_t;
-
    typedef struct
    {
        dl_matrix3d_t *category;