IDF master b86fe0c66c

This commit is contained in:
me-no-dev
2021-10-13 18:21:12 +00:00
parent 2fb2ef54ce
commit 34c81be93b
538 changed files with 17119 additions and 4346 deletions

View File

@ -10,7 +10,7 @@
#define DL_LOG_LAYER_LATENCY 0 /*<! - 1: print the latency of each parts of layer */
/*<! - 0: mute */
#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT || CONFIG_ESP32S3_SPIRAM_SUPPORT
#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT || CONFIG_ESP32S2_SPIRAM_SUPPORT || CONFIG_ESP32S3_SPIRAM_SUPPORT
#define DL_SPIRAM_SUPPORT 1
#else
#define DL_SPIRAM_SUPPORT 0
@ -83,8 +83,17 @@ namespace dl
typedef enum
{
PADDING_VALID, /*<! no padding >*/
PADDING_SAME, /*<! SAME in TensorFlow style >*/
PADDING_SAME_MXNET /*<! SAME in MXNET style >*/
PADDING_NOT_SET,
PADDING_VALID, /*<! no padding >*/
PADDING_SAME_BEGIN, /*<! SAME in MXNET style >*/
PADDING_SAME_END, /*<! SAME in TensorFlow style >*/
} padding_type_t;
} // namespace dl
typedef enum
{
CONSTANT,
EDGE,
REFLECT,
SYMMETRIC,
} padding_mode_t;
} // namespace dl

View File

@ -370,11 +370,70 @@ namespace dl
*/
uint32_t get_moving_point_number(uint8_t *f1, uint8_t *f2, const uint32_t height, const uint32_t width, const uint32_t stride, const uint32_t threshold = 5);
/**
* @brief Apply an affine transformation to an image.
*
* @tparam T
* @param input the input image.
* @param output the output image.
* @param M_inv the inverse transformation matrix.
*/
template <typename T>
void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
/**
* @brief Apply an affine transformation to an image.
*
* @tparam T
* @param input the pointer of the input image.
* @param shape the shape of the input image.
* @param output the output image.
* @param M_inv the inverse transformation matrix.
*/
template <typename T>
void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
/**
* @brief Get the otsu thresh object.
*
* @param image the gray image.
* @return uint8_t the otsu thresh.
*/
uint8_t get_otsu_thresh(Tensor<uint8_t> &image);
/**
* @brief Convert RGB image to gray image
*
* @param image input image
* @param bgr true: the image is in BGR format
* false: the image is in RGB format
* @return Tensor<uint8_t>* output image in gray format
*/
Tensor<uint8_t> *rgb2gray(Tensor<uint8_t> &image, bool bgr = false);
/**
* @brief Convert RGB image to LAB image
*
* @param image input image
* @param bgr true: the image is in BGR format
* false: the image is in RGB format
* @param fast true: use the fast alogrithm but the accuracy will be reduced
* false: do not use the fast alogrithm
* @return Tensor<uint8_t>* output image in LAB foramt
*/
Tensor<uint8_t> *rgb2lab(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
/**
* @brief Convert RGB image to HSV image
*
* @param image input image
* @param bgr true: the image is in BGR format
* false: the image is in RGB format
* @param fast true: use the fast alogrithm but the accuracy will be reduced
* false: do not use the fast alogrithm
* @return Tensor<uint8_t>* output image in HSV format
*/
Tensor<uint8_t> *rgb2hsv(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
} // namespace image
} // namespace dl

View File

@ -25,7 +25,8 @@ namespace dl
const int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of add2d >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of add2d >*/
public:
/**
@ -35,19 +36,21 @@ namespace dl
* @param activation activation of add2d, if you don't specify anything, no activation is applied
* @param name name of add2d
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
Add2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name), activation(activation), output_exponent(output_exponent), output(NULL)
{
this->inplace = inplace;
}
Add2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = "Add2D", bool inplace = false) : Layer(name),
activation(activation),
output_exponent(output_exponent),
output(NULL),
inplace(inplace),
output_shape({}) {}
/**
* @brief Destroy the Add2D object
*/
~Add2D()
{
if((!this->inplace) && (this->output != NULL))
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
@ -59,10 +62,12 @@ namespace dl
*
* @param input0 as one input
* @param input1 as another input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
{
assert(input0.is_same_shape(input1));
this->output_shape = input0.shape;
if (!this->inplace)
{
@ -78,6 +83,11 @@ namespace dl
{
this->output = &input0;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -105,7 +115,11 @@ namespace dl
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -116,6 +130,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::add2d(*this->output, input0, input1, this->activation, assign_core, this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "add2d");
}

View File

@ -24,23 +24,26 @@ namespace dl
std::vector<int> filter_shape; /*<! filter shape in [filter_height, filter_width] >*/
const int stride_y; /*<! stride in height >*/
const int stride_x; /*<! stride in width >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
Tensor<feature_t> *output; /*<! output ptr of AvgPool2D >*/
Tensor<feature_t> *output; /*<! output ptr of AvgPool2D >*/
std::vector<int> output_shape; /*<! output shape of AvgPool2D >*/
public:
/**
* @brief Construct a new AvgPool2D object.
*
* @param output_exponent exponent of output
* @param filter_shape filter shape in [filter_height, filter_width]
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
* - PADDING_VALID means no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* - PADDING_NOT_SET means padding with the specific "padding" value below.
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
* @param stride_y stride in height
* @param stride_x stride in width
* @param name name of layer
@ -48,16 +51,23 @@ namespace dl
AvgPool2D(const int output_exponent,
const std::vector<int> filter_shape,
const padding_type_t padding_type = PADDING_VALID,
std::vector<int> padding = {},
const int stride_y = 1,
const int stride_x = 1,
const char *name = NULL) : Layer(name),
output_exponent(output_exponent),
filter_shape(filter_shape),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type)
const char *name = "AvgPool2D") : Layer(name),
output_exponent(output_exponent),
filter_shape(filter_shape),
padding_type(padding_type),
padding(padding),
stride_y(stride_y),
stride_x(stride_x),
output_shape({})
{
this->output = new Tensor<feature_t>;
if (this->padding_type == PADDING_NOT_SET)
{
assert(this->padding.size() == 4);
}
}
/**
@ -66,7 +76,7 @@ namespace dl
*/
~AvgPool2D()
{
if(this->output != NULL)
if (this->output != NULL)
{
delete this->output;
}
@ -76,20 +86,31 @@ namespace dl
* @brief Update output shape and padding.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
std::vector<int> output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
this->output->set_shape(output_shape);
assert(input.shape.size() == 3);
this->output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->padding = nn::get_pad_size(output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
input.set_padding_size(this->padding);
this->output->free_element();
}
if (this->padding_type != PADDING_NOT_SET)
{
this->padding = nn::get_pad_size(this->output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
}
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
@ -108,7 +129,6 @@ namespace dl
* @param autoload_enable one of true or false,
* - true: load input and output from PSRAM to CACHE automatically
* - false: do not
* @param assign_core not effective yet
* @return AvgPool2D result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input, uint8_t autoload_enable = 0)
@ -116,7 +136,11 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");

View File

@ -1,6 +1,7 @@
#pragma once
#include "dl_tool.hpp"
#include "dl_tool_cache.hpp"
#include <iostream>
namespace dl
{

View File

@ -0,0 +1,139 @@
#pragma once
#include <assert.h>
#include <vector>
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
#include "dl_nn_concat.hpp"
namespace dl
{
namespace layer
{
/**
* @brief Concat(input1, input2, input3, ...).
*
* @tparam feature_t support all kinds of integer and float data type
*/
template <typename feature_t>
class Concat : Layer
{
private:
int output_exponent; /*<! exponent of output >*/
int axis; /*<! The axis along which the Tensor will be concatenated. >*/
Tensor<feature_t> *output; /*<! output ptr of Concat >*/
std::vector<int> output_shape; /*<! output shape of Concat >*/
public:
/**
* @brief Construct a new Concat object.
*
* @param name name of layer
* @param axis The axis along which the Tensor will be concatenated.
*/
Concat(int axis, const char *name = "Concat") : Layer(name), axis(axis), output_shape({})
{
this->output = new Tensor<feature_t>;
}
/**
* @brief Destroy the Concat object
*/
~Concat()
{
if (this->output != NULL)
{
delete this->output;
}
}
/**
* @brief Collect inputs' channel and memory offset, called in Model.build().
*
* @param args pointers of concatenated Tensor
* @param print_shape whether to print the output shape.
*/
void build(std::vector<Tensor<feature_t> *> args, bool print_shape = false)
{
assert(args.size() > 1);
int shape_size = args[0]->shape.size();
if (this->axis < 0)
{
this->axis = shape_size + this->axis;
}
assert((this->axis < shape_size) && (this->axis > -1));
int output_shape_axis = args[0]->shape[this->axis];
for (int i = 1; i < args.size(); i++)
{
assert(shape_size == args[i]->shape.size());
assert(args[i]->exponent == args[i - 1]->exponent);
output_shape_axis += args[i]->shape[this->axis];
for (int j = 0; j < shape_size; j++)
{
if (j != this->axis)
{
assert(args[i]->shape[j] == args[i - 1]->shape[j]);
}
}
}
this->output_exponent = args[0]->exponent;
this->output_shape = args[0]->shape;
this->output_shape[this->axis] = output_shape_axis;
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Call Concat operation
*
* @param inputs the pointers of inputs
* @param free_inputs true: free the inputs after call
* false: do not free inputs
* @return Tensor<feature_t>& concat result
*/
Tensor<feature_t> &call(std::vector<Tensor<feature_t> *> inputs, bool free_inputs = false)
{
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
DL_LOG_LAYER_LATENCY_START();
nn::concat(*this->output, inputs, this->axis, free_inputs);
DL_LOG_LAYER_LATENCY_END(this->name, "concat");
return *this->output;
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& Concat result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -13,8 +13,11 @@ namespace dl
* @tparam feature_t supports int16_t and int8_t,
* - int16_t: stands for operation in int16_t quantize
* - int8_t: stands for operation in int8_t quantize
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
* - int16_t: for int16 quantization and int8 per-channel quantization
* - int8_t: for int8 per-tensor quantization
*/
template <typename feature_t>
template <typename feature_t, typename bias_t = feature_t>
class Conv2D : public Layer
{
private:
@ -22,14 +25,14 @@ namespace dl
const Filter<feature_t> *filter; /*<! filter of Conv2D >*/
const int stride_y; /*<! stride in height >*/
const int stride_x; /*<! stride in width >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
const Bias<feature_t> *bias; /*<! bias of Conv2D, if you don't specify anything, no bias is added >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
const Bias<bias_t> *bias; /*<! bias of Conv2D, if you don't specify anything, no bias is added >*/
const Activation<feature_t> *activation; /*<! activation of Conv2D, if you don't specify anything, no activation is applied >*/
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
Tensor<feature_t> *output; /*<! output ptr of Conv2D >*/
Tensor<feature_t> *output; /*<! output ptr of Conv2D >*/
std::vector<int> output_shape; /*<! output shape of Conv2D >*/
public:
/**
* @brief Construct a new Conv2D object.
*
@ -37,33 +40,43 @@ namespace dl
* @param filter filter of Conv2D
* @param bias bias of Conv2D, if you don't specify anything, no bias is added
* @param activation activation of Conv2D, if you don't specify anything, no activation is applied
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
* - PADDING_VALID means no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* - PADDING_NOT_SET means padding with the specific "padding" value below.
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
* @param stride_y stride in height
* @param stride_x stride in width
* @param name name of layer
*/
Conv2D(const int output_exponent,
const Filter<feature_t> *filter,
const Bias<feature_t> *bias = NULL,
const Bias<bias_t> *bias = NULL,
const Activation<feature_t> *activation = NULL,
const padding_type_t padding_type = PADDING_VALID,
std::vector<int> padding = {},
const int stride_y = 1,
const int stride_x = 1,
const char *name = NULL) : Layer(name),
output_exponent(output_exponent),
filter(filter),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type),
bias(bias),
activation(activation)
const char *name = "Conv2D") : Layer(name),
output_exponent(output_exponent),
filter(filter),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type),
bias(bias),
activation(activation),
padding(padding),
output_shape({})
{
this->output = new Tensor<feature_t>;
if (this->padding_type == PADDING_NOT_SET)
{
assert(this->padding.size() == 4);
}
}
/**
@ -82,19 +95,30 @@ namespace dl
* @brief Update output padding and input padding.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
assert(input.shape.size() == 3);
assert(this->filter->shape.size() == 4);
assert(input.shape[2] == this->filter->shape[2]);
std::vector<int> output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, true);
this->output->set_shape(output_shape);
this->output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, true, this->padding);
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->output->free_element();
if (this->padding_type != PADDING_NOT_SET)
{
this->padding = nn::get_pad_size(this->output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
}
this->padding = nn::get_pad_size(output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
input.set_padding_size(this->padding);
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -122,7 +146,11 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -153,5 +181,6 @@ namespace dl
dl::tool::cache::preload_func((uint32_t)(this->filter->element), size);
}
};
} // namespace layer
} // namespace dl

View File

@ -13,8 +13,11 @@ namespace dl
* @tparam feature_t supports int16_t and int8_t,
* - int16_t: stands for operation in int16_t quantize
* - int8_t: stands for operation in int8_t quantize
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
* - int16_t: for int16 quantization and int8 per-channel quantization
* - int8_t: for int8 per-tensor quantization
*/
template <typename feature_t>
template <typename feature_t, typename bias_t = feature_t>
class DepthwiseConv2D : public Layer
{
private:
@ -22,14 +25,14 @@ namespace dl
const Filter<feature_t> *filter; /*<! filter of DepthwiseConv2D >*/
const int stride_y; /*<! stride in height >*/
const int stride_x; /*<! stride in width >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
const Bias<feature_t> *bias; /*<! bias of DepthwiseConv2D, if you don't specify anything, no bias is added >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
const Bias<bias_t> *bias; /*<! bias of DepthwiseConv2D, if you don't specify anything, no bias is added >*/
const Activation<feature_t> *activation; /*<! activation of DepthwiseConv2D, if you don't specify anything, no activation is applied >*/
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
Tensor<feature_t> *output; /*<! output ptr of DepthwiseConv2D >*/
std::vector<int> output_shape; /*<! output shape of DepthwiseConv2D >*/
public:
/**
* @brief Construct a new DepthwiseConv2D object.
*
@ -37,40 +40,50 @@ namespace dl
* @param filter filter of DepthwiseConv2D
* @param bias bias of DepthwiseConv2D, if you don't specify anything, no bias is added
* @param activation activation of DepthwiseConv2D, if you don't specify anything, no activation is applied
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
* - PADDING_VALID means no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* - PADDING_NOT_SET means padding with the specific "padding" value below.
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
* @param stride_y - stride in height
* @param stride_x - stride in width
* @param name name of layer
*/
DepthwiseConv2D(const int output_exponent,
const Filter<feature_t> *filter,
const Bias<feature_t> *bias = NULL,
const Bias<bias_t> *bias = NULL,
const Activation<feature_t> *activation = NULL,
const padding_type_t padding_type = PADDING_VALID,
std::vector<int> padding = {},
const int stride_y = 1,
const int stride_x = 1,
const char *name = NULL) : Layer(name),
output_exponent(output_exponent),
filter(filter),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type),
bias(bias),
activation(activation)
const char *name = "DepthwiseConv2D") : Layer(name),
output_exponent(output_exponent),
filter(filter),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type),
bias(bias),
activation(activation),
padding(padding),
output_shape({})
{
this->output = new Tensor<feature_t>;
if (this->padding_type == PADDING_NOT_SET)
{
assert(this->padding.size() == 4);
}
}
/**
* @brief Destroy the DepthwiseConv2D object.
*
*/
~DepthwiseConv2D()
~DepthwiseConv2D()
{
if (this->output != NULL)
{
@ -82,19 +95,31 @@ namespace dl
* @brief Update output shape and padding.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
assert(input.shape.size() == 3);
assert(this->filter->shape.size() == 4);
assert(input.shape[2] == this->filter->shape[2]);
std::vector<int> output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
this->output->set_shape(output_shape);
this->output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->padding = nn::get_pad_size(output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
input.set_padding_size(this->padding);
if (this->padding_type != PADDING_NOT_SET)
{
this->padding = nn::get_pad_size(this->output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
}
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -122,7 +147,12 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");

View File

@ -0,0 +1,128 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief
*
* @tparam feature_t
*/
template <typename feature_t>
class ExpandDims : public Layer
{
private:
std::vector<int> output_shape; /*<! output shape of ExpandDims >*/
std::vector<int> axis; /*<! position where the new axis is placed. >*/
Tensor<feature_t> *output; /*<! output ptr of ExpandDims >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
public:
int output_exponent;
/**
* @brief Construct a new ExpandDims object
*
* @param axis position where the new axis is placed.
* @param name name of layer
* @param inplace true: the output will store to input
* false: the output will store to a separate memory
*/
ExpandDims(std::vector<int> axis, const char *name = "ExpandDims", bool inplace = false) : Layer(name),
axis(axis), inplace(inplace), output_shape({})
{
}
/**
* @brief Destroy the ExpandDims object
*
*/
~ExpandDims()
{
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
}
/**
* @brief Update output shape.
*
* @param input as an input.
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
this->output_exponent = input.exponent;
if (!this->inplace)
{
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(this->output_shape);
this->output->expand_dims(this->axis);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(this->output_shape);
this->output->expand_dims(this->axis);
}
this->output_shape = this->output->shape;
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& ExpandDims result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief call ExpandDims opeartion
*
* @param input
* @return Tensor<feature_t>& ExpandDims result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input)
{
DL_LOG_LAYER_LATENCY_INIT();
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_exponent(input.exponent);
this->output->set_shape(this->output_shape);
this->output->copy_element(input, true);
DL_LOG_LAYER_LATENCY_END(this->name, "ExpandDims");
}
else
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_shape(this->output_shape);
DL_LOG_LAYER_LATENCY_END(this->name, "ExpandDims");
}
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -0,0 +1,120 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief
*
* @tparam feature_t
*/
template <typename feature_t>
class Flatten : public Layer
{
private:
int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of Flatten >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of Flatten >*/
public:
/**
* @brief Construct a new Flatten object
*
* @param name name of layer
* @param inplace true: the output will store to input0
* false: the output will store to a separate memory
*/
Flatten(const char *name = "Flatten", bool inplace = false) : Layer(name), inplace(inplace), output_shape({})
{}
/**
* @brief Destroy the Flatten object
*
*/
~Flatten()
{
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
}
/**
* @brief Update output shape.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
this->output_exponent = input.exponent;
this->output_shape = {input.get_size()};
if (!this->inplace)
{
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(this->output_shape);
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& Flatten result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief Call Flatten operation.
*
* @param input as an input
* @return Tensor<feature_t>& Flatten result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input)
{
DL_LOG_LAYER_LATENCY_INIT();
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_exponent(input.exponent);
this->output->flatten();
this->output->copy_element(input, true);
DL_LOG_LAYER_LATENCY_END(this->name, "flatten");
}
else
{
DL_LOG_LAYER_LATENCY_START();
this->output->flatten();
DL_LOG_LAYER_LATENCY_END(this->name, "flatten");
}
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -0,0 +1,167 @@
#pragma once
#include "dl_nn_fully_connected.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief Activation(FullyConnected(input, filter) + bias).
*
* @tparam feature_t supports int16_t and int8_t,
* - int16_t: stands for operation in int16_t quantize
* - int8_t: stands for operation in int8_t quantize
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
* - int16_t: for int16 quantization and int8 per-channel quantization
* - int8_t: for int8 per-tensor quantization
*/
template <typename feature_t, typename bias_t = feature_t>
class FullyConnected : public Layer
{
private:
const int output_exponent; /*<! exponent of output >*/
const bool flatten; /*<! true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim] >*/
const Filter<feature_t> *filter; /*<! filter of FullyConnected >*/
const Bias<bias_t> *bias; /*<! bias of FullyConnected, if you don't specify anything, no bias is added >*/
const Activation<feature_t> *activation; /*<! activation of FullyConnected, if you don't specify anything, no activation is applied >*/
Tensor<feature_t> *output; /*<! output ptr of FullyConnected >*/
std::vector<int> output_shape; /*<! output shape of FullyConnected >*/
public:
/**
* @brief Construct a new FullyConnected object.
*
* @param output_exponent exponent of output
* @param filter filter of FullyConnected
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
* @param name name of layer
*/
FullyConnected(const int output_exponent,
const Filter<feature_t> *filter,
const Bias<bias_t> *bias = NULL,
const Activation<feature_t> *activation = NULL,
const bool flatten = true,
const char *name = "FullyConnected") : Layer(name),
output_exponent(output_exponent),
flatten(flatten),
filter(filter),
bias(bias),
activation(activation),
output_shape({})
{
this->output = new Tensor<feature_t>;
}
/**
* @brief Destroy the FullyConnected object.
*
*/
~FullyConnected()
{
if (this->output != NULL)
{
delete this->output;
}
}
/**
* @brief Update output padding and input padding.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(this->filter->shape.size() == 4);
assert(this->filter->shape[0] == 1);
assert(this->filter->shape[1] == 1);
if (this->flatten)
{
assert(input.get_size() == this->filter->shape[2]);
this->output_shape = {this->filter->shape[3]};
}
else
{
assert(input.shape.back() == this->filter->shape[2]);
this->output_shape = input.shape;
this->output_shape[this->output_shape.size() - 1] = this->filter->shape[3];
}
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& FullyConnected result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief Call FullyConnected operation
*
* @param input as an input.
* @param autoload_enable one of true or false,
* - true: load input and output from PSRAM to CACHE automatically
* - false: do not
* @param assign_core not effective yet
* @return FullyConnected result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input, bool autoload_enable = false, const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
{
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
if (autoload_enable)
{
dl::tool::cache::autoload_func((uint32_t)(this->output->element), this->output->get_size() * sizeof(feature_t),
(uint32_t)(input.element), input.get_size() * sizeof(feature_t));
}
DL_LOG_LAYER_LATENCY_START();
nn::fully_connected(*this->output, input, *(this->filter), this->bias, this->activation, this->flatten, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "fully_connected");
return *this->output;
}
/**
* @brief Preload the filter to Cache.
* NOTE: Call this layer's preload() before previous layer's call() such that filter could be loaded while previous layer is doing calculation.
*/
void preload()
{
size_t size = sizeof(feature_t);
int shape_size = this->filter->shape.size();
for (int i = 0; i < shape_size; ++i)
{
size *= filter->shape[i];
}
dl::tool::cache::preload_func((uint32_t)(this->filter->element), size);
}
};
} // namespace layer
} // namespace dl

View File

@ -20,8 +20,9 @@ namespace dl
class GlobalAveragePool2D : public Layer
{
private:
const int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of GlobalAveragePool2D >*/
const int output_exponent; /*<! exponent of output >*/
std::vector<int> output_shape; /*<! output shape of GlobalAveragePool2D >*/
Tensor<feature_t> *output; /*<! output ptr of GlobalAveragePool2D >*/
public:
/**
* @brief Construct a new GlobalAveragePool2D object.
@ -29,8 +30,9 @@ namespace dl
* @param output_exponent exponent of output
* @param name name of layer
*/
GlobalAveragePool2D(const int output_exponent, const char *name = NULL) : Layer(name),
output_exponent(output_exponent)
GlobalAveragePool2D(const int output_exponent, const char *name = "GlobalAveragePool2D") : Layer(name),
output_exponent(output_exponent),
output_shape({})
{
this->output = new Tensor<feature_t>;
@ -52,17 +54,26 @@ namespace dl
* @brief Update output shape.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
assert(input.shape.size() == 3);
std::vector<int> output_shape(input.shape.size(), 1);
output_shape[2] = input.shape[2];
this->output->set_shape(output_shape);
this->output_shape = output_shape;
this->output->set_shape(this->output_shape);
this->output->set_exponent(this->output_exponent);
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -90,7 +101,11 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");

View File

@ -20,15 +20,15 @@ namespace dl
class GlobalMaxPool2D : public Layer
{
private:
Tensor<feature_t> *output; /*<! output ptr of GlobalMaxPool2D >*/
Tensor<feature_t> *output; /*<! output ptr of GlobalMaxPool2D >*/
std::vector<int> output_shape; /*<! output shape of GlobalMaxPool2D >*/
public:
/**
* @brief Construct a new GlobalMaxPool2D object.
*
* @param name name of layer
*/
GlobalMaxPool2D(const char *name = NULL) : Layer(name)
GlobalMaxPool2D(const char *name = "GlobalMaxPool2D") : Layer(name), output_shape({})
{
this->output = new Tensor<feature_t>;
}
@ -49,17 +49,26 @@ namespace dl
* @brief Update output shape and exponent.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
assert(input.shape.size() == 3);
this->output->set_exponent(input.exponent);
std::vector<int> output_shape(input.shape.size(), 1);
output_shape[2] = input.shape[2];
this->output->set_shape(output_shape);
this->output_shape = output_shape;
this->output->set_shape(this->output_shape);
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -87,7 +96,11 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");

View File

@ -2,7 +2,7 @@
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_nn_LeakyReLU.hpp"
#include "dl_nn_leakyrelu.hpp"
#include "dl_layer_base.hpp"
namespace dl
@ -20,13 +20,13 @@ namespace dl
class LeakyReLU : public Layer
{
private:
feature_t activation_alpha; /*<! quantized alpha >*/
int activation_exponent; /*<! exponent of quantized alpha >*/
Tensor<feature_t> *output; /*<! output ptr of leakyrelu>*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
feature_t activation_alpha; /*<! quantized alpha >*/
int activation_exponent; /*<! exponent of quantized alpha >*/
Tensor<feature_t> *output; /*<! output ptr of leakyrelu>*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of leakyrelu >*/
public:
/**
* @brief Construct a new LeakyReLU object
*
@ -34,9 +34,9 @@ namespace dl
* @param activation_exponent exponent of quantized alpha
* @param name name of leakyrelu
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
LeakyReLU(const int activation_alpha, const int activation_exponent, const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
LeakyReLU(const int activation_alpha, const int activation_exponent, const char *name = "LeakyReLU", bool inplace = false) : Layer(name), output(NULL), output_shape({})
{
this->activation_alpha = activation_alpha;
this->activation_exponent = activation_exponent;
@ -47,7 +47,7 @@ namespace dl
* @brief Destroy the LeakyReLU object
*
*/
~LeakyReLU()
~LeakyReLU()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -59,24 +59,32 @@ namespace dl
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
if(!this->inplace)
this->output_shape = input.shape;
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_shape(input.shape);
}
this->output->set_shape(this->output_shape);
this->output->set_exponent(input.exponent);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(this->output_shape);
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -100,10 +108,14 @@ namespace dl
{
DL_LOG_LAYER_LATENCY_INIT();
if(!this->inplace)
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -114,6 +126,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::leakyrelu<true>(*this->output, input, this->activation_alpha, this->activation_exponent, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "leakyrelu");
}

View File

@ -22,28 +22,28 @@ namespace dl
class Max2D : public Layer
{
private:
Tensor<feature_t> *output; /*<! output ptr of max2d >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
Tensor<feature_t> *output; /*<! output ptr of max2d >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of max2d >*/
public:
/**
* @brief Construct a new Max2D object.
*
* @param name name of max2d
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
Max2D(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
Max2D(const char *name = "Max2D", bool inplace = false) : Layer(name),
output(NULL), inplace(inplace), output_shape({})
{
this->inplace = inplace;
}
/**
* @brief Destroy the Max2D object
*
*/
~Max2D()
~Max2D()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -58,24 +58,34 @@ namespace dl
*
* @param input0 as one input
* @param input1 as another input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
{
assert(input0.is_same_shape(input1));
assert(input0.exponent == input1.exponent);
this->output_shape = input0.shape;
if(!this->inplace)
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(input0.shape);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input0;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -100,10 +110,14 @@ namespace dl
{
DL_LOG_LAYER_LATENCY_INIT();
if(!this->inplace)
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input0.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -114,6 +128,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::max2d<true>(*this->output, input0, input1, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "max2d");
}

View File

@ -23,44 +23,54 @@ namespace dl
std::vector<int> filter_shape; /*<! filter shape in [filter_height, filter_width] >*/
const int stride_y; /*<! stride in height >*/
const int stride_x; /*<! stride in width >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
Tensor<feature_t> *output; /*<! output ptr of MaxPool2D >*/
std::vector<int> output_shape; /*<! output shape of MaxPool2D >*/
public:
/**
* @brief Construct a new MaxPool2D object.
*
* @param filter_shape filter shape in [filter_height, filter_width]
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
* - PADDING_VALID means no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* - PADDING_NOT_SET means padding with the specific "padding" value below.
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
* @param stride_y stride in height
* @param stride_x stride in width
* @param name name of layer
*/
MaxPool2D(const std::vector<int> filter_shape,
const padding_type_t padding_type = PADDING_VALID,
std::vector<int> padding = {},
const int stride_y = 1,
const int stride_x = 1,
const char *name = NULL) : Layer(name),
filter_shape(filter_shape),
stride_y(stride_y),
stride_x(stride_x),
padding_type(padding_type)
const char *name = "MaxPool2D") : Layer(name),
filter_shape(filter_shape),
padding_type(padding_type),
padding(padding),
stride_y(stride_y),
stride_x(stride_x),
output_shape({})
{
this->output = new Tensor<feature_t>;
if (this->padding_type == PADDING_NOT_SET)
{
assert(this->padding.size() == 4);
}
}
/**
* @brief Destroy the MaxPool2D object.
*
*/
~MaxPool2D()
~MaxPool2D()
{
if (this->output != NULL)
{
@ -72,18 +82,29 @@ namespace dl
* @brief Update output shape and padding.
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
assert(input.shape[0] > 0);
assert(input.shape[1] > 0);
this->output->set_exponent(input.exponent);
std::vector<int> output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
this->output->set_shape(output_shape);
assert(input.shape.size() == 3);
this->padding = nn::get_pad_size(output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
input.set_padding_size(this->padding);
this->output->set_exponent(input.exponent);
this->output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
this->output->set_shape(this->output_shape);
if (this->padding_type != PADDING_NOT_SET)
{
this->padding = nn::get_pad_size(this->output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
}
this->output->free_element();
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -111,7 +132,11 @@ namespace dl
DL_LOG_LAYER_LATENCY_INIT();
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");

View File

@ -22,28 +22,28 @@ namespace dl
class Min2D : public Layer
{
private:
Tensor<feature_t> *output; /*<! output of ptr min2d>*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
public:
Tensor<feature_t> *output; /*<! output of ptr min2d>*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of min2d >*/
public:
/**
* @brief Construct a new Min2D object
*
* @param name name of min2d
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
Min2D(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
{
this->inplace = inplace;
}
Min2D(const char *name = "Min2D", bool inplace = false) : Layer(name),
output(NULL),
inplace(inplace),
output_shape({}) {}
/**
* @brief Destroy the Min2D object
*
*/
~Min2D()
~Min2D()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -58,25 +58,34 @@ namespace dl
*
* @param input0 as one input
* @param input1 as another input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
{
assert(input0.is_same_shape(input1));
assert(input0.exponent == input1.exponent);
this->output_shape = input0.shape;
if(!this->inplace)
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_shape(input0.shape);
this->output->set_shape(this->output_shape);
this->output->set_exponent(input0.exponent);
this->output->free_element();
}
else
{
this->output = &input0;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -101,10 +110,14 @@ namespace dl
{
DL_LOG_LAYER_LATENCY_INIT();
if(!this->inplace)
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input0.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -115,6 +128,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::min2d<true>(*this->output, input0, input1, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "min2d");
}

View File

@ -21,14 +21,13 @@ namespace dl
class Mul2D : public Layer
{
private:
const int output_exponent; /*<! exponent of output >*/
const int output_exponent; /*<! exponent of output >*/
const Activation<feature_t> *activation; /*<! activation of Mul2D, if you don't specify anything, no activation is applied >*/
Tensor<feature_t> *output; /*<! output ptr of Mul2D >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
Tensor<feature_t> *output; /*<! output ptr of Mul2D >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of Mul2D >*/
public:
const int output_exponent; /*<! exponent of output >*/
/**
* @brief Construct a new Mul2D object.
*
@ -36,18 +35,24 @@ namespace dl
* @param activation activation of Mul2D, if you don't specify anything, no activation is applied
* @param name name of layer
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
Mul2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name),
output_exponent(output_exponent),activation(activation), output(NULL)
Mul2D(const int output_exponent,
const Activation<feature_t> *activation = NULL,
const char *name = "Mul2D",
bool inplace = false) : Layer(name),
output_exponent(output_exponent),
activation(activation),
output(NULL),
inplace(inplace),
output_shape({})
{
this->inplace = inplace;
}
/**
* @brief Destroy the Multiply2D object.
*/
~Mul2D()
~Mul2D()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -61,24 +66,34 @@ namespace dl
*
* @param input0 as one input
* @param input1 as another input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
{
assert(input0.is_same_shape(input1));
this->output_shape = input0.shape;
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(input0.shape);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input0;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -106,7 +121,11 @@ namespace dl
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -117,6 +136,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::mul2d<true>(*this->output, input0, input1, this->activation, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "mul2d");
}

View File

@ -24,9 +24,9 @@ namespace dl
int activation_exponent; /*<! exponent of quantized alpha elements >*/
Tensor<feature_t> *output; /*<! output ptr of prelu >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of prelu >*/
public:
/**
* @brief Construct a new PReLU object
*
@ -34,20 +34,25 @@ namespace dl
* @param activation_exponent exponent of quantized alpha elements
* @param name name of prelu
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
PReLU(const feature_t *activation_element, const int activation_exponent = 0, const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
PReLU(const feature_t *activation_element,
const int activation_exponent = 0,
const char *name = NULL,
bool inplace = "PReLU") : Layer(name),
activation_element(activation_element),
activation_exponent(activation_exponent),
output(NULL),
inplace(inplace),
output_shape({})
{
this->activation_element = activation_element;
this->activation_exponent = activation_exponent;
this->inplace = inplace;
}
/**
* @brief Destroy the PReLU object
*
*/
~PReLU()
~PReLU()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -59,23 +64,31 @@ namespace dl
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
if(!this->inplace)
this->output_shape = input.shape;
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(input.exponent);
this->output->set_shape(input.shape);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -99,11 +112,15 @@ namespace dl
{
DL_LOG_LAYER_LATENCY_INIT();
if(!this->inplace)
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->set_exponent(input.exponent);
this->output->apply_element();
this->output->malloc_element();
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
DL_LOG_LAYER_LATENCY_START();
@ -113,6 +130,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::prelu(*this->output, input, this->activation_element, this->activation_exponent, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "leakyrelu");
}

View File

@ -21,29 +21,28 @@ namespace dl
class ReLU : public Layer
{
private:
Tensor<feature_t> *output; /*<! output ptr of relu >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
Tensor<feature_t> *output; /*<! output ptr of relu >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of relu >*/
public:
/**
* @brief Construct a new ReLU object
*
* @param name name of relu
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
ReLU(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
ReLU(const char *name = "ReLU", bool inplace = false) : Layer(name),
output(NULL), inplace(inplace), output_shape({})
{
this->inplace = inplace;
}
/**
* @brief Destroy the ReLU object
*
*/
~ReLU()
~ReLU()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -55,23 +54,31 @@ namespace dl
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input)
void build(Tensor<feature_t> &input, bool print_shape = false)
{
if(!this->inplace)
this->output_shape = input.shape;
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(input.exponent);
this->output->set_shape(input.shape);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -95,10 +102,14 @@ namespace dl
{
DL_LOG_LAYER_LATENCY_INIT();
if(!this->inplace)
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output->malloc_element();
this->output->set_exponent(input.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -109,6 +120,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::relu(*this->output, input, assign_core);
DL_LOG_LAYER_LATENCY_END(this->name, "relu");
}

View File

@ -0,0 +1,124 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief Reshape(input)
*
* @tparam feature_t supports int16_t and int8_t,
* - int16_t: stands for operation in int16_t quantize
* - int8_t: stands for operation in int8_t quantize
*/
template <typename feature_t>
class Reshape : public Layer
{
private:
int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of Reshape >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of Reshape >*/
public:
/**
* @brief Construct a new Reshape object
*
* @param shape the target shape
* @param name name of Reshape layer
* @param inplace true: the output will store to input0
* false: the output will store to a separate memory
*/
Reshape(std::vector<int> shape, const char *name = "Reshape", bool inplace = false) : Layer(name),
output_shape(shape), inplace(inplace)
{
}
/**
* @brief Destroy the Reshape object
*
*/
~Reshape()
{
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
}
/**
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
this->output_exponent = input.exponent;
if (!this->inplace)
{
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(this->output_shape);
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& Reshape result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief Call Reshape operation.
*
* @param input as an input
* @return Tensor<feature_t>& Reshape result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input)
{
DL_LOG_LAYER_LATENCY_INIT();
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_exponent(input.exponent);
this->output->reshape(this->output_shape);
this->output->copy_element(input, true);
DL_LOG_LAYER_LATENCY_END(this->name, "reshape");
}
else
{
DL_LOG_LAYER_LATENCY_START();
this->output->reshape(this->output_shape);
DL_LOG_LAYER_LATENCY_END(this->name, "reshape");
}
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -0,0 +1,127 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief
*
* @tparam feature_t
*/
template <typename feature_t>
class Squeeze : public Layer
{
private:
int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of Squeeze >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
int axis; /*<! the dim to to be remove. make sure the length of the dim is equal to 1.
if axis == INT32_MAX, all the dims with length==1 will be removed. >*/
std::vector<int> output_shape; /*<! output shape of AvgPool2D >*/
public:
/**
* @brief Construct a new Squeeze object
*
* @param axis the dim to to be remove. make sure the length of the dim is equal to 1.
* if axis == INT32_MAX, all the dims with length==1 will be removed.
* @param name name of Squeeze layer
* @param inplace true: the output will store to input0
* false: the output will store to a separate memory
*/
Squeeze(int axis = INT32_MAX, const char *name = "Squeeze", bool inplace = false) : Layer(name), axis(axis), inplace(inplace), output_shape({})
{
}
/**
* @brief Destroy the Squeeze object
*
*/
~Squeeze()
{
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
}
/**
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
this->output_exponent = input.exponent;
if (!this->inplace)
{
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(input.shape);
this->output->squeeze(this->axis);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(input.shape);
this->output->squeeze(this->axis);
}
this->output_shape = this->output->shape;
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& Squeeze result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief Call Squeeze operation.
*
* @param input as an input
* @return Tensor<feature_t>& Squeeze result
*/
Tensor<feature_t> &call(Tensor<feature_t> &input)
{
DL_LOG_LAYER_LATENCY_INIT();
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_exponent(input.exponent);
this->output->set_shape(this->output_shape);
this->output->copy_element(input, true);
DL_LOG_LAYER_LATENCY_END(this->name, "Squeeze");
}
else
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_shape(this->output_shape);
DL_LOG_LAYER_LATENCY_END(this->name, "Squeeze");
}
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -21,13 +21,13 @@ namespace dl
class Sub2D : public Layer
{
private:
const int output_exponent; /*<! exponent of output >*/
const Activation<feature_t> *activation; /*<! activation of Mul2D, if you don't specify anything, no activation is applied >*/
Tensor<feature_t> *output; /*<! output ptr of Sub2D >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a seperate memeory >*/
const int output_exponent; /*<! exponent of output >*/
const Activation<feature_t> *activation; /*<! activation of Sub2D, if you don't specify anything, no activation is applied >*/
Tensor<feature_t> *output; /*<! output ptr of Sub2D >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> output_shape; /*<! output shape of Sub2D >*/
public:
/**
* @brief Construct a new Sub2D object.
*
@ -35,18 +35,17 @@ namespace dl
* @param activation activation of Mul2D, if you don't specify anything, no activation is applied
* @param name name of layer
* @param inplace true: the output will store to input0
* false: the output will store to a seperate memeory
* false: the output will store to a separate memory
*/
Sub2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name),
output_exponent(output_exponent), activation(activation), output(NULL)
Sub2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = "Sub2D", bool inplace = false) : Layer(name),
output_exponent(output_exponent), activation(activation), output(NULL), inplace(inplace), output_shape({})
{
this->inplace = inplace;
}
/**
* @brief Destroy the Sub2D object.
*/
~Sub2D()
~Sub2D()
{
if ((!this->inplace) && (this->output != NULL))
{
@ -60,22 +59,32 @@ namespace dl
*
* @param input0 as one input
* @param input1 as another input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
{
assert(input0.is_same_shape(input1));
this->output_shape = input0.shape;
if (!this->inplace)
{
if(this->output != NULL)
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(input0.shape);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
}
else
{
this->output = &input0;
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
@ -103,7 +112,11 @@ namespace dl
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output.apply_element();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
this->output.malloc_element();
this->output->set_exponent(input0.exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
@ -114,6 +127,10 @@ namespace dl
else
{
DL_LOG_LAYER_LATENCY_START();
if (this->output->shape != this->output_shape)
{
this->output->set_shape(this->output_shape);
}
nn::sub2d<true>(this->output, input0, input1, this->activation, assign_core, this->output_exponent);
DL_LOG_LAYER_LATENCY_END(this->name, "sub2d");
}

View File

@ -0,0 +1,126 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_tool.hpp"
#include "dl_layer_base.hpp"
namespace dl
{
namespace layer
{
/**
* @brief
*
* @tparam feature_t
*/
template <typename feature_t>
class Transpose : public Layer
{
private:
int output_exponent; /*<! exponent of output >*/
Tensor<feature_t> *output; /*<! output ptr of Transpose >*/
bool inplace; /*<! true: the output will store to input0
false: the output will store to a separate memory >*/
std::vector<int> perm; /*<! the new arangement of the dims. if perm == {}, the dims arangement will be reversed. >*/
std::vector<int> output_shape; /*<! output shape of Transpose >*/
public:
/**
* @brief Construct a new Transpose object
*
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
* @param name name of Transpose layer
* @param inplace true: the output will store to input
* false: the output will store to a separate memory
*/
Transpose(std::vector<int> perm = {}, const char *name = "Transpose", bool inplace = false) : Layer(name), perm(perm), inplace(inplace), output_shape({})
{
}
/**
* @brief Destroy the Transpose object
*
*/
~Transpose()
{
if ((!this->inplace) && (this->output != NULL))
{
delete this->output;
}
}
/**
* @brief Update output shape and exponent
*
* @param input as an input
* @param print_shape whether to print the output shape.
*/
void build(Tensor<feature_t> &input, bool print_shape = false)
{
this->output_exponent = input.exponent;
this->output_shape = input.shape;
for (int i = 0; i < this->perm.size(); i++)
{
this->output_shape[i] = input.shape[this->perm[i]];
}
if (!this->inplace)
{
if (this->output != NULL)
{
this->output = new Tensor<feature_t>;
}
this->output->set_exponent(this->output_exponent);
this->output->set_shape(this->output_shape);
this->output->free_element();
}
else
{
this->output = &input;
this->output->set_shape(this->output_shape);
}
if (print_shape)
{
std::cout << this->name << " | ";
this->output->print_shape();
}
}
/**
* @brief Get the output
*
* @return Tensor<feature_t>& Transpose result
*/
Tensor<feature_t> &get_output()
{
return *this->output;
}
/**
* @brief Call Transpose operation.
*
* @param input as an input.
* @return Tensor<feature_t>& Transpose result.
*/
Tensor<feature_t> &call(Tensor<feature_t> &input)
{
DL_LOG_LAYER_LATENCY_INIT();
if (!this->inplace)
{
DL_LOG_LAYER_LATENCY_START();
this->output->set_exponent(input.exponent);
this->output->transpose(input, this->perm);
DL_LOG_LAYER_LATENCY_END(this->name, "transpose");
}
else
{
DL_LOG_LAYER_LATENCY_START();
this->output->transpose(this->perm);
DL_LOG_LAYER_LATENCY_END(this->name, "transpose");
}
return *this->output;
}
};
} // namespace layer
} // namespace dl

View File

@ -0,0 +1,68 @@
#pragma once
#include "dl_image.hpp"
typedef struct
{
int area; /*!< Area of connected domains >*/
std::vector<int> center; /*<! centroid of connected domains [x, y] >*/
std::vector<int> box; /*<! [left_up_x, left_up_y, right_down_x, right_down_y] >*/
} components_stats_t;
class ColorDetector
{
private:
std::vector<std::vector<components_stats_t>> results; /*!< detection results >*/
public:
std::vector<std::vector<uint8_t>> color_thresh; /*!< threshold of colors, The threshold of each color is composed of 6 numbers >*/
std::vector<int> area_thresh; /*!< the area threshold of each color,
the area that is smaller than the threshold is filtered >*/
bool bgr; /*!< true: the input image is in BGR format
false: the input image is in RGB format >*/
/**
* @brief get the color threshold of rectangular region in the image
*
* @param image the input image
* @param box the coordinates of the rectanglar region : [left_up_x, left_up_y, right_down_x, right_down_y]
* @return std::vector<uint8_t> the threshold.
*/
std::vector<uint8_t> cal_color_thresh(dl::Tensor<uint8_t> &image, std::vector<int> box);
/**
* @brief detect the colors based on the color thresholds
*
* @param image the input image.
* @return std::vector<std::vector<components_stats_t>>& detection result.
*/
std::vector<std::vector<components_stats_t>> &detect(dl::Tensor<uint8_t> &image);
/**
* @brief Construct a new Color Detector object
*
* @param color_thresh threshold of colors, The threshold of each color is composed of 6 numbers
* @param area_thresh the area threshold of each color,the area that is smaller than the threshold is filtered
* @param bgr true: the input image is in BGR format
* false: the input image is in RGB format
*/
ColorDetector(std::vector<std::vector<uint8_t>> color_thresh, std::vector<int> area_thresh, bool bgr = false) : color_thresh(color_thresh), area_thresh(area_thresh), bgr(bgr)
{
}
/**
* @brief Destroy the Color Detector object
*
*/
~ColorDetector() {}
/**
* @brief Get the results object
*
* @return std::vector<std::vector<components_stats_t>>& the detection result.
*/
std::vector<std::vector<components_stats_t>> &get_results()
{
return this->results;
}
};

View File

@ -92,7 +92,7 @@ namespace face_recognition_tool
* @return dl::Tensor<T>*
*/
template <typename T>
dl::Tensor<T> *transform_mfn_input(dl::Tensor<uint8_t> &image, bool free_input = false, bool do_padding = true);
dl::Tensor<T> *transform_mfn_input(dl::Tensor<uint8_t> &image, bool free_input = false);
/**
* @brief transform the image to the input of a mfn model
@ -106,7 +106,7 @@ namespace face_recognition_tool
* false: do not pad the result
*/
template <typename T>
void transform_mfn_input(dl::Tensor<uint8_t> &image, dl::Tensor<T> &output, bool free_input = false, bool do_padding = true);
void transform_mfn_input(dl::Tensor<uint8_t> &image, dl::Tensor<T> &output, bool free_input = false);
/**
* @brief transform the mfn output embedding to a floating embedding

View File

@ -14,13 +14,13 @@ namespace dl
* @param filter_shape filter shape with dilation
* @param stride_y stride in height
* @param stride_x stride in width
* @param pad_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET
* @param pad_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN
* @param is_conv2d one of true or false,
* - true: serve for Conv2D
* - false: serve for other operations
* @return std::vector<int>
*/
std::vector<int> get_output_shape(const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t pad_type, const bool is_conv2d = false);
std::vector<int> get_output_shape(const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t pad_type, const bool is_conv2d = false, std::vector<int> padding = {});
/**
* @brief Get the pad size object
@ -30,7 +30,7 @@ namespace dl
* @param filter_shape filter shape with dilation
* @param stride_y stride in height
* @param stride_x stride in width
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN
* @return padding size
*/
std::vector<int> get_pad_size(const std::vector<int> &output_shape, const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t padding_type);

View File

@ -58,20 +58,20 @@ namespace dl
*/
template <bool inplace = false, typename feature_t>
auto add2d(const int output_exponent,
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
assert(input0.is_same_shape(input1));
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -58,12 +58,12 @@ namespace dl
* @param filter_shape filter_shape in [filter_height, filter_width]
* @param stride_y stride in height
* @param stride_x stride in width
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
* - PADDING_VALID: no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* @param assign_core not effective yet
* @return avg_pool2d result
*/
@ -81,19 +81,19 @@ namespace dl
DL_LOG_NN_LATENCY_START();
std::vector<int> output_shape = get_output_shape(input.shape, filter_shape, stride_y, stride_x, padding_type);
Tensor<feature_t> output;
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
std::vector<int> padding(4, 0);
DL_LOG_NN_LATENCY_START();
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
{
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
input.set_padding_size(padding);
padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
}
DL_LOG_NN_LATENCY_END("padding");
DL_LOG_NN_LATENCY_START();
avg_pool2d(output, input, input.padding, filter_shape, stride_y, stride_x, assign_core);
avg_pool2d(output, input, padding, filter_shape, stride_y, stride_x, assign_core);
DL_LOG_NN_LATENCY_END("avg_pool2d");
return output;

View File

@ -0,0 +1,63 @@
#pragma once
#include <vector>
#include "dl_variable.hpp"
#include "dl_nn.hpp"
namespace dl
{
namespace nn
{
template <typename feature_t>
void concat(Tensor<feature_t> &output, std::vector<Tensor<feature_t> *> &inputs, int axis, bool free_inputs = false);
template <typename feature_t>
Tensor<feature_t> concat(std::vector<Tensor<feature_t> *> &inputs, int axis, bool free_inputs = false)
{
DL_LOG_NN_LATENCY_INIT();
DL_LOG_NN_LATENCY_START();
assert(inputs.size() > 1);
int shape_size = inputs[0]->shape.size();
if (axis < 0)
{
axis = shape_size + axis;
}
assert((axis < shape_size) && (axis > -1));
int output_shape_axis = inputs[0]->shape[axis];
for (int i = 1; i < inputs.size(); i++)
{
assert(shape_size == inputs[i]->shape.size());
assert(inputs[i]->exponent == inputs[i - 1]->exponent);
output_shape_axis += inputs[i]->shape[axis];
for (int j = 0; j < shape_size; j++)
{
if (j != axis)
{
assert(inputs[i]->shape[j] == inputs[i - 1]->shape[j]);
}
}
}
DL_LOG_NN_LATENCY_END("assert");
DL_LOG_NN_LATENCY_START();
Tensor<feature_t> output;
std::vector<int> output_shape = inputs[0]->shape;
output_shape[axis] = output_shape_axis;
output.set_shape(output_shape);
output.set_exponent(inputs[0]->exponent);
output.malloc_element();
DL_LOG_NN_LATENCY_END("malloc");
DL_LOG_NN_LATENCY_START();
concat(output, inputs, axis, free_inputs);
DL_LOG_NN_LATENCY_END("concat");
return output;
}
} // namespace nn
} // namespace dl

View File

@ -10,7 +10,6 @@ namespace dl
{
/**
* @brief activation(conv2d(input, filter) + bias).
* NOTE: When padding_type is SAME, make sure padding is already added in input.
*
* @param output as an output
* @param input as an input
@ -34,7 +33,6 @@ namespace dl
/**
* @brief activation(conv2d(input, filter) + bias).
* NOTE: When padding_type is SAME, make sure padding is already added in input.
*
* @param output as an output
* @param input as an input
@ -56,6 +54,29 @@ namespace dl
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(conv2d(input, filter) + bias).
*
* @param output as an output
* @param input as an input
* @param padding padding size needed in [top, bottom, left, right] of this operation
* @param filter filter of conv2d
* @param stride_y stride in height
* @param stride_x stride in width
* @param bias bias of conv2d, if you don't specify anything, no bias is added
* @param activation activation of conv2d, if you don't specify anything, no activation is applied
* @param assign_core not effective yet
*/
void conv2d(Tensor<int8_t> &output,
Tensor<int8_t> &input,
std::vector<int> &padding,
const Filter<int8_t> &filter,
const int stride_y,
const int stride_x,
const Bias<int16_t> *const bias = NULL,
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(conv2d(input, filter) + bias).
*
@ -67,25 +88,25 @@ namespace dl
* @param filter Filter of conv2d
* @param stride_y stride in height
* @param stride_x stride in width
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
* - PADDING_VALID: no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* @param bias bias of conv2d, if you don't specify anything, no bias is added
* @param activation activation of conv2d, if you don't specify anything, no activation is applied
* @param assign_core not effective yet
* @return conv2d result
*/
template <typename feature_t>
template <typename feature_t, typename bias_t>
Tensor<feature_t> conv2d(const int output_exponent,
Tensor<feature_t> &input,
const Filter<feature_t> &filter,
const int stride_y,
const int stride_x,
const padding_type_t padding_type,
const Bias<feature_t> *bias,
const Bias<bias_t> *bias,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
{
@ -94,20 +115,19 @@ namespace dl
DL_LOG_NN_LATENCY_START();
std::vector<int> output_shape = get_output_shape(input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type, true);
Tensor<feature_t> output;
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
std::vector<int> padding(4, 0);
DL_LOG_NN_LATENCY_START();
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
{
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
input.set_padding_size(padding);
input.set_padding_value(padding, 0);
padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
}
DL_LOG_NN_LATENCY_END("padding");
DL_LOG_NN_LATENCY_START();
conv2d(output, input, input.padding, filter, stride_y, stride_x, bias, activation, assign_core);
conv2d(output, input, padding, filter, stride_y, stride_x, bias, activation, assign_core);
DL_LOG_NN_LATENCY_END("conv2d");
return output;

View File

@ -10,7 +10,6 @@ namespace dl
{
/**
* @brief activate(depthwise_conv2d(input, filter) + bias)
* NOTE: When padding_type is SAME, make sure padding is already added in input
*
* @param output as an output
* @param input as an input
@ -34,7 +33,6 @@ namespace dl
/**
* @brief activate(depthwise_conv2d(input, filter) + bias)
* NOTE: When padding_type is SAME, make sure padding is already added in input
*
* @param output as an output
* @param input as an input
@ -56,6 +54,29 @@ namespace dl
const Activation<int8_t> *activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activate(depthwise_conv2d(input, filter) + bias)
*
* @param output as an output
* @param input as an input
* @param padding padding size needed in [top, bottom, left, right] of this operation
* @param filter Filter of depthwise_conv2d
* @param stride_y stride in height
* @param stride_x stride in width
* @param bias bias of depthwise_conv2d, if you don't specify anything, no bias is added
* @param activation activation of depthwise_conv2d, if you don't specify anything, no activation is applied
* @param assign_core not effective yet
*/
void depthwise_conv2d(Tensor<int8_t> &output,
Tensor<int8_t> &input,
std::vector<int> &padding,
const Filter<int8_t> &filter,
const int stride_y,
const int stride_x,
const Bias<int16_t> *bias = NULL,
const Activation<int8_t> *activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(depthwise_conv2d(input, filter) + bias)
*
@ -67,25 +88,25 @@ namespace dl
* @param filter filter of depthwise_conv2d
* @param stride_y stride in height
* @param stride_x stride in width
* @param pad_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param pad_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
* - PADDING_VALID means no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* @param bias bias of depthwise_conv2d, if you don't specify anything, no bias is added
* @param activation activation of depthwise_conv2d, if you don't specify anything, no activation is applied
* @param assign_core not effective yet
* @return depthwise_conv2d result
*/
template <typename feature_t>
template <typename feature_t, typename bias_t>
Tensor<feature_t> depthwise_conv2d(const int output_exponent,
Tensor<feature_t> &input,
const Filter<feature_t> &filter,
const int stride_y,
const int stride_x,
const padding_type_t padding_type,
const Bias<feature_t> *bias,
const Bias<bias_t> *bias,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
{
@ -94,20 +115,20 @@ namespace dl
DL_LOG_NN_LATENCY_START();
std::vector<int> output_shape = get_output_shape(input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
Tensor<feature_t> output;
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
std::vector<int> padding(4, 0);
DL_LOG_NN_LATENCY_START();
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
{
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
input.set_padding_size(padding);
input.set_padding_value(padding, 0);
padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
}
DL_LOG_NN_LATENCY_END("padding");
DL_LOG_NN_LATENCY_START();
depthwise_conv2d(output, input, input.padding, filter, stride_y, stride_x, bias, activation, assign_core);
depthwise_conv2d(output, input, padding, filter, stride_y, stride_x, bias, activation, assign_core);
DL_LOG_NN_LATENCY_END("depthwise_conv2d");
return output;

View File

@ -0,0 +1,126 @@
#pragma once
#include "dl_constant.hpp"
#include "dl_variable.hpp"
#include "dl_nn.hpp"
namespace dl
{
namespace nn
{
/**
* @brief activation(FullyConnected(input, filter) + bias).
*
* @param output as an output
* @param input as an input
* @param filter filter of FullyConnected
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
* @param assign_core not effective yet
*/
void fully_connected(Tensor<int16_t> &output,
Tensor<int16_t> &input,
const Filter<int16_t> &filter,
const Bias<int16_t> *const bias = NULL,
const Activation<int16_t> *const activation = NULL,
const bool flatten = true,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(FullyConnected(input, filter) + bias).
*
* @param output as an output
* @param input as an input
* @param filter filter of FullyConnected
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
* @param assign_core not effective yet
*/
void fully_connected(Tensor<int8_t> &output,
Tensor<int8_t> &input,
const Filter<int8_t> &filter,
const Bias<int8_t> *const bias = NULL,
const Activation<int8_t> *const activation = NULL,
const bool flatten = true,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(FullyConnected(input, filter) + bias).
*
* @param output as an output
* @param input as an input
* @param filter filter of FullyConnected
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
* @param assign_core not effective yet
*/
void fully_connected(Tensor<int8_t> &output,
Tensor<int8_t> &input,
const Filter<int8_t> &filter,
const Bias<int16_t> *const bias = NULL,
const Activation<int8_t> *const activation = NULL,
const bool flatten = true,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief activation(FullyConnected(input, filter) + bias).
*
* @tparam feature_t supports int16_t and int8_t,
* - int16_t: stands for operation in int16_t quantize
* - int8_t: stands for operation in int8_t quantize
* @param output_exponent exponent of output
* @param input as an input
* @param filter Filter of FullyConnected
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
* @param assign_core not effective yet
* @return FullyConnected result
*/
template <typename feature_t>
Tensor<feature_t> fully_connected(const int output_exponent,
Tensor<feature_t> &input,
const Filter<feature_t> &filter,
const Bias<feature_t> *bias,
const Activation<feature_t> *activation,
const bool flatten,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
{
DL_LOG_NN_LATENCY_INIT();
DL_LOG_NN_LATENCY_START();
assert(filter.shape.size() == 4);
assert(filter.shape[0] == 1);
assert(filter.shape[1] == 1);
std::vector<int> output_shape;
if (flatten)
{
assert(input.get_size() == filter.shape[2]);
output_shape = {filter.shape.back()};
}
else
{
assert(input.shape.back() == filter->shape[2]);
output_shape = input.shape;
output_shape[output_shape.size() - 1] = filter.shape.back();
}
Tensor<feature_t> output;
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();
fully_connected(output, input, filter, bias, activation, flatten, assign_core);
DL_LOG_NN_LATENCY_END("fully_connected");
return output;
}
} // namespace nn
} // namespace dl

View File

@ -53,7 +53,7 @@ namespace dl
std::vector<int> output_shape(input.shape.size(), 1);
output_shape[2] = input.shape[2];
Tensor<feature_t> output;
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -51,7 +51,7 @@ namespace dl
std::vector<int> output_shape(input.shape.size(), 1);
output_shape[2] = input.shape[2];
Tensor<feature_t> output;
output.set_exponent(input.exponent).set_shape(output_shape).apply_element();
output.set_exponent(input.exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -52,17 +52,17 @@ namespace dl
* @return leakyrelu result or no return(result store to input)
*/
template <bool inplace = false, typename feature_t>
auto leakyrelu(Tensor<feature_t> &input,
const int activation_alpha,
const int activation_exponent,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto leakyrelu(Tensor<feature_t> &input,
const int activation_alpha,
const int activation_exponent,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -48,20 +48,20 @@ namespace dl
* @return max2d result or no return(result store to input0)
*/
template <bool inplace = false, typename feature_t>
auto max2d(Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto max2d(Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
assert(input0.is_same_shape(input1));
assert(input0.exponent == input1.exponent);
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(input0.exponent).set_shape(input0.shape).apply_element();
output.set_exponent(input0.exponent).set_shape(input0.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -57,12 +57,12 @@ namespace dl
* @param filter_shape filter shape in [filter_height, filter_width]
* @param stride_y stride in height
* @param stride_x stride in width
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
* - PADDING_VALID: no padding
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
* such that output has the same height/width dimension as the input,
* - PADDING_SAME results padding in TensorFlow style
* - PADDING_SAME_MXNET results padding in MXNET style
* - PADDING_SAME_END results padding in TensorFlow style
* - PADDING_SAME_BEGIN results padding in MXNET style
* @param assign_core not effective yet
* @return max_pool2d result
*/
@ -79,20 +79,20 @@ namespace dl
DL_LOG_NN_LATENCY_START();
std::vector<int> output_shape = get_output_shape(input.shape, filter_shape, stride_y, stride_x, padding_type);
Tensor<feature_t> output;
output.set_exponent(input.exponent).set_shape(output_shape).apply_element();
output.set_exponent(input.exponent).set_shape(output_shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
std::vector<int> padding(4, 0);
DL_LOG_NN_LATENCY_START();
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
{
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
input.set_padding_size(padding);
input.set_padding_value(padding, 0);
padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
}
DL_LOG_NN_LATENCY_END("padding");
DL_LOG_NN_LATENCY_START();
max_pool2d(output, input, input.padding, filter_shape, stride_y, stride_x, assign_core);
max_pool2d(output, input, padding, filter_shape, stride_y, stride_x, assign_core);
DL_LOG_NN_LATENCY_END("max_pool2d");
return output;

View File

@ -47,20 +47,20 @@ namespace dl
* @return min2d result or no return(result store to input0)
*/
template <bool inplace = false, typename feature_t>
auto min2d(Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto min2d(Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
assert(input0.is_same_shape(input1));
assert(input0.exponent == input1.exponent);
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(input0.exponent).set_shape(input0.shape).apply_element();
output.set_exponent(input0.exponent).set_shape(input0.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -18,12 +18,12 @@ namespace dl
* @param assign_core not effective yet
* @param output_exponent exponent of output, only and must specify if inplace operation happens
*/
void mul2d(Tensor<int16_t> &output,
Tensor<int16_t> &input0,
Tensor<int16_t> &input1,
const Activation<int16_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
void mul2d(Tensor<int16_t> &output,
Tensor<int16_t> &input0,
Tensor<int16_t> &input1,
const Activation<int16_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
/**
* @brief activation(mul2d(input0, input1)).
@ -35,12 +35,12 @@ namespace dl
* @param assign_core not effective yet
* @param output_exponent exponent of output, only and must specify if inplace operation happens
*/
void mul2d(Tensor<int8_t> &output,
Tensor<int8_t> &input0,
Tensor<int8_t> &input1,
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
void mul2d(Tensor<int8_t> &output,
Tensor<int8_t> &input0,
Tensor<int8_t> &input1,
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
/**
* @brief activation(mul2d(input0, input1)).
@ -57,21 +57,21 @@ namespace dl
* @return mul2d result or no return(result store to input0)
*/
template <bool inplace = false, typename feature_t>
auto mul2d(const int output_exponent,
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto mul2d(const int output_exponent,
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
assert(input0.is_same_shape(input1));
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -52,17 +52,17 @@ namespace dl
* @return prelu result or no return(result store to input)
*/
template <bool inplace = false, typename feature_t>
auto prelu(Tensor<feature_t> &input,
const feature_t *activation_element,
const int activation_exponent,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto prelu(Tensor<feature_t> &input,
const feature_t *activation_element,
const int activation_exponent,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();
@ -76,7 +76,7 @@ namespace dl
DL_LOG_NN_LATENCY_START();
prelu(input, input, activation_element, activation_exponent, assign_core);
DL_LOG_NN_LATENCY_END("prelu");
}
}
}
} // namespace nn
} // namespace dl

View File

@ -15,9 +15,9 @@ namespace dl
* @param input as an input
* @param assign_core not effective yet
*/
void relu(Tensor<int16_t> &output,
Tensor<int16_t> &input,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
void relu(Tensor<int16_t> &output,
Tensor<int16_t> &input,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief relu(input).
@ -26,9 +26,9 @@ namespace dl
* @param input as an input
* @param assign_core not effective yet
*/
void relu(Tensor<int8_t> &output,
Tensor<int8_t> &input,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
void relu(Tensor<int8_t> &output,
Tensor<int8_t> &input,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
/**
* @brief relu(input)
@ -46,11 +46,11 @@ namespace dl
{
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -18,12 +18,12 @@ namespace dl
* @param assign_core not effective yet
* @param output_exponent exponent of output, only and must specify if inplace operation happens
*/
void sub2d(Tensor<int16_t> &output,
Tensor<int16_t> &input0,
Tensor<int16_t> &input1,
const Activation<int16_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
void sub2d(Tensor<int16_t> &output,
Tensor<int16_t> &input0,
Tensor<int16_t> &input1,
const Activation<int16_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
/**
* @brief activation(sub2d(input0, input1)).
@ -35,12 +35,12 @@ namespace dl
* @param assign_core not effective yet
* @param output_exponent exponent of output, only and must specify if inplace operation happens
*/
void sub2d(Tensor<int8_t> &output,
Tensor<int8_t> &input0,
Tensor<int8_t> &input1,
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
void sub2d(Tensor<int8_t> &output,
Tensor<int8_t> &input0,
Tensor<int8_t> &input1,
const Activation<int8_t> *const activation = NULL,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
const int output_exponent = INT_MIN);
/**
* @brief activation(sub2d(input0, input1)).
@ -57,20 +57,20 @@ namespace dl
* @return sub2d result or no return(result store to input0)
*/
template <bool inplace = false, typename feature_t>
auto sub2d(const int output_exponent,
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
auto sub2d(const int output_exponent,
Tensor<feature_t> &input0,
Tensor<feature_t> &input1,
const Activation<feature_t> *activation,
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
{
assert(input0.is_same_shape(input1));
DL_LOG_NN_LATENCY_INIT();
Tensor<feature_t> output;
if constexpr(!inplace)
if constexpr (!inplace)
{
DL_LOG_NN_LATENCY_START();
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
DL_LOG_NN_LATENCY_END("apply");
DL_LOG_NN_LATENCY_START();

View File

@ -67,62 +67,49 @@ namespace dl
void copy_memory(void *dst, void *src, const int n);
/**
* @brief Apply memory without initialized. Must use free_aligned() to free the memory.
* @brief Apply memory without initialized. Can use free_aligned() to free the memory.
*
* @param number number of elements
* @param size size of element
* @param align number of aligned, e.g., 16 means 16-byte aligned
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
* @return pointer of allocated memory. NULL for failed
*/
inline void *malloc_aligned(int number, int size, int align = 0)
inline void *malloc_aligned(int number, int size, int align = 4)
{
int n = number * size;
n >>= 4;
n += 2;
n <<= 4;
int total_size = n + align + sizeof(void *) + sizeof(int);
void *res = malloc(total_size);
assert((align > 0) && (((align & (align-1)) == 0)));
int total_size = number * size;
void *res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
#if DL_SPIRAM_SUPPORT
if (NULL == res)
res = heap_caps_malloc(total_size, MALLOC_CAP_SPIRAM);
res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_SPIRAM);
#endif
if (NULL == res)
{
printf("Fail to malloc %d bytes from DRAM(%d bytyes) and PSRAM(%d bytes), PSRAM is %s.\n",
total_size,
heap_caps_get_free_size(MALLOC_CAP_INTERNAL),
heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
DL_SPIRAM_SUPPORT ? "on" : "off");
return NULL;
}
void **data = (void **)res + 2; // 4-byte for pointer, 4-bytes for n
void **aligned;
if (align)
aligned = (void **)(((size_t)data + (align - 1)) & -align);
else
aligned = data;
aligned[-1] = res;
int *temp = (int *)aligned;
temp[-2] = n;
return (void *)aligned;
return (void *)res;
}
/**
* @brief Apply memory with zero-initialized. Must use dl_lib_free() to free the memory.
* @brief Apply memory with zero-initialized. Can use free_aligned() to free the memory.
*
* @param number number of elements
* @param size size of element
* @param align number of aligned, e.g., 16 means 16-byte aligned
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
* @return pointer of allocated memory. NULL for failed
*/
inline void *calloc_aligned(int number, int size, int align = 0)
inline void *calloc_aligned(int number, int size, int align = 4)
{
void *aligned = malloc_aligned(number, size, align);
int n = *((int *)aligned - 2);
set_zero(aligned, n);
set_zero(aligned, number * size);
return (void *)aligned;
}
@ -137,7 +124,70 @@ namespace dl
if (NULL == address)
return;
free(((void **)address)[-1]);
heap_caps_free(address);
}
/**
* @brief Apply memory without initialized in preference order: internal aligned, internal, external aligned
*
* @param number number of elements
* @param size size of element
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
* @return pointer of allocated memory. NULL for failed
*/
inline void *malloc_aligned_prefer(int number, int size, int align = 4)
{
assert((align > 0) && (((align & (align-1)) == 0)));
int total_size = number * size;
void *res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
if (NULL == res){
res = heap_caps_malloc(total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
}
#if DL_SPIRAM_SUPPORT
if (NULL == res){
res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_SPIRAM);
}
#endif
if (NULL == res)
{
printf("Fail to malloc %d bytes from DRAM(%d bytyes) and PSRAM(%d bytes), PSRAM is %s.\n",
total_size,
heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
DL_SPIRAM_SUPPORT ? "on" : "off");
return NULL;
}
return res;
}
/**
* @brief Apply memory with zero-initialized in preference order: internal aligned, internal, external aligned
*
* @param number number of elements
* @param size size of element
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
* @return pointer of allocated memory. NULL for failed
*/
inline void *calloc_aligned_prefer(int number, int size, int align = 4)
{
void *res = malloc_aligned_prefer(number, size, align);
set_zero(res, number * size);
return (void *)res;
}
/**
* @brief Free the calloc_aligned_prefer() and malloc_aligned_prefer() memory
*
* @param address pointer of memory to free
*/
inline void free_aligned_prefer(void *address)
{
if (NULL == address)
return;
heap_caps_free(address);
}
/**

View File

@ -57,7 +57,8 @@ namespace dl
* @param exponent exponent of element
* @param shape shape of Filter,
* - 1D: reserved
* - 2D: [filter_height, filter_width, input_channel, output_channel]
* - 2D: for convolution is [filter_height, filter_width, input_channel, output_channel],
* for depthwise convolution is [filter_height, filter_width, input_channel, 1]
* @param dilation dilation of Filter
* - 1D: reserved
* - 2D: [dilation_in_height, dilation_in_width]
@ -97,6 +98,9 @@ namespace dl
{
public:
using Constant<T>::Constant;
std::vector<int> channel_exponent; /*<! exponent for per-channel >*/
Bias(const T *element, const std::vector<int> channel_exponent, const std::vector<int> shape);
};
/**

View File

@ -3,6 +3,7 @@
#include <stdio.h>
#include <vector>
#include <assert.h>
#include <iostream>
#include "dl_tool.hpp"
@ -17,27 +18,20 @@ namespace dl
class Tensor
{
private:
int size; /*<! size of element including padding */
bool auto_free; /*<! free element when object destroy */
int size; /*<! size of element including padding */
bool auto_free; /*<! free element when object destroy */
std::vector<int> axis_offset; /*<! element offset of each axis */
public:
T *element; /*<! point to element */
int exponent; /*<! exponent of element */
std::vector<int> shape; /*<! shape of Tensor */
/*<! 2D: shape is [height, width, channel] */
/*<! 1D: reserved */
std::vector<int> shape_with_padding; /*<! shape with padding of Tensor */
/*<! 2D: shape_with_padding is [height_with_padding, width_with_padding, channel_with_padding] */
/*<! 1D: reserved */
std::vector<int> padding; /*<! padding of Tensor */
/*<!- 2D: padding format is [top, bottom, left, right] */
/*<! - 1D: reserved */
T *element; /*<! point to element */
int exponent; /*<! exponent of element */
std::vector<int> shape; /*<! shape of Tensor */
/**
* @brief Construct a new Tensor object
*
*/
Tensor() : size(-1), auto_free(true), element(NULL), exponent(0) {}
Tensor() : auto_free(true), element(NULL), exponent(0) { this->set_shape({0}); }
/**
* @brief Construct a new Tensor object by copying from input.
@ -49,21 +43,20 @@ namespace dl
*/
Tensor(Tensor<T> &input, bool deep) : size(input.size),
auto_free(input.auto_free),
exponent(input.exponent),
shape(input.shape),
shape_with_padding(input.shape_with_padding),
padding(input.padding)
exponent(input.exponent)
{
if (deep)
this->set_shape(input.shape);
if (deep && (input.element != NULL))
{
int size_real = input.shape_with_padding.size() ? input.shape_with_padding[0] * input.shape_with_padding[1] * input.shape_with_padding[2] : 0;
T *new_element = (T *)tool::calloc_aligned(size_real, sizeof(T), 16);
int size_real = input.get_size();
T *new_element = (T *)tool::calloc_aligned_prefer(size_real, sizeof(T), 16);
tool::copy_memory(new_element, input.element, size_real * sizeof(T));
this->element = new_element;
}
else
{
this->element = input.element;
this->auto_free = false;
}
}
@ -77,6 +70,33 @@ namespace dl
this->free_element();
}
/**
* @brief
*
* @param input an input Tensor
* @param deep one of true or false
* - true: apply a new memory, copy value from input.element to this new memory
* - false: take over input.element to this->element
* @return Tensor<T>& self
*/
Tensor<T> &copy_element(Tensor<T> &input, bool deep)
{
assert(this->get_size() == input.get_size());
assert(input.element != NULL);
this->malloc_element();
if (deep)
{
tool::copy_memory(this->element, input.element, this->get_size() * sizeof(T));
}
else
{
this->element = input.element;
this->auto_free = false;
}
return *this;
}
/**
* @brief Set the auto free object.
*
@ -120,190 +140,144 @@ namespace dl
}
/**
* @brief Set the shape of Tensor. Initial this->padding = {0}. Initial this->size = -1.
* @brief Set the shape of Tensor.
*
* @param shape shape in
* - 2D: [height, width]
* @param shape the target shape
*
* @return self
*/
Tensor<T> &set_shape(const std::vector<int> shape)
Tensor<T> &set_shape(const std::vector<int> shape);
/**
* @brief print the shape of the Tensor
*
*/
void print_shape()
{
for (int i = 0; i < shape.size(); ++i)
if (this->shape.size())
{
assert(shape[i] > 0);
printf("shape = (");
for (int i = 0; i < this->shape.size() - 1; i++)
{
printf("%d, ", this->shape[i]);
}
printf("%d)\n", this->shape.back());
}
else
{
printf("shape = ()\n");
}
this->shape = shape;
this->shape_with_padding = shape;
this->size = -1;
this->padding = std::vector<int>(((this->shape.size() - 1) << 1), 0);
return *this;
}
/**
* @brief Set the padding size object.
* @brief flatten the Tensor
*
* @param padding padding size in
* - 2D: [top, bottom, left, right]
* @return self
* @return Tensor<T>& self
*/
Tensor &set_padding_size(std::vector<int> &padding)
{
assert(this->shape.size()); // call Tensor.set_shape() first
assert(this->shape.size() == 3); // TODO: || this->shape.size() == 2
if (this->shape.size() == 3)
{
std::vector<int> new_padding = this->padding;
bool dont_update = true;
if (padding[0] > this->padding[0])
{
new_padding[0] = padding[0];
dont_update = false;
}
if (padding[1] > this->padding[1])
{
new_padding[1] = padding[1];
dont_update = false;
}
if (padding[2] > this->padding[2])
{
new_padding[2] = padding[2];
dont_update = false;
}
if (padding[3] > this->padding[3])
{
new_padding[3] = padding[3];
dont_update = false;
}
if (dont_update)
{
return *this;
}
std::vector<int> new_shape_with_padding = this->shape;
new_shape_with_padding[0] += (new_padding[0] + new_padding[1]);
new_shape_with_padding[1] += (new_padding[2] + new_padding[3]);
int new_size = new_shape_with_padding[0] * new_shape_with_padding[1] * new_shape_with_padding[2];
if (this->element) // if this->element != NULL, do padding by copy memory
{
T *new_element = (T *)tool::malloc_aligned(new_size, sizeof(T), 16);
T *dst = new_element + ((new_padding[0] * new_shape_with_padding[1]) + new_padding[2]) * new_shape_with_padding[2];
T *src = this->get_element_ptr();
int offset_dst_next_y = new_shape_with_padding[1] * new_shape_with_padding[2]; // width * channel
int src_copy_length = this->shape[1] * this->shape[2]; // width * channel
int offset_src_next_y = this->shape_with_padding[1] * this->shape_with_padding[2]; // width * channel
for (int y = 0; y < this->shape[0]; y++)
{
tool::copy_memory(dst, src, src_copy_length * sizeof(T));
dst += offset_dst_next_y;
src += offset_src_next_y;
}
if (this->auto_free)
tool::free_aligned(this->element);
this->element = new_element;
this->auto_free = true;
}
this->padding = new_padding;
this->shape_with_padding = new_shape_with_padding;
this->size = new_size;
}
else if (this->shape.size() == 2)
{
printf("Tensor.set_padding_size with this->shape.size() == 2 not implement yet.\n");
}
return *this;
}
Tensor<T> &flatten();
/**
* @brief Set the padding value object.
* @brief Change a new shape to the Tensor without changing its data.
*
* @param padding padding size in
* - 2D: [top, bottom, left, right]
* @param value value to set
* @return self
* @param shape the target shape
* @return Tensor<T>& self
*/
Tensor<T> &set_padding_value(std::vector<int> &padding, T value);
Tensor<T> &reshape(std::vector<int> shape);
/**
* @brief Remove dims with length==1 from Tensor
*
* @param axis the dim to to be remove. make sure the length of the dim is equal to 1.
* if axis == INT32_MAX, all the dims with length==1 will be removed.
* @return Tensor<T>& self
*/
Tensor<T> &squeeze(int axis = INT32_MAX);
/**
* @brief Insert a new dim that will appear at the axis position in the expanded Tensor shape.
*
* @param axis the dim to be inserted
* @return Tensor<T>& self
*/
Tensor<T> &expand_dims(int axis);
/**
* @brief Insert a new dim that will appear at the axis position in the expanded Tensor shape.
*
* @param axis the dim to be inserted
* @return Tensor<T>& self
*/
Tensor<T> &expand_dims(std::vector<int> axis);
/**
* @brief Reverse or permute the axes of the Tensor
*
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
* @return Tensor<T>& self
*/
Tensor<T> &transpose(std::vector<int> perm = {});
/**
* @brief Reverse or permute the axes of the input Tensor
*
* @param input the input Tensor
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
* @return Tensor<T>& self
*/
Tensor<T> &transpose(Tensor<T> &input, std::vector<int> perm = {});
/**
* @brief Get the element pointer.
*
* @param padding padding size in
* - 2D: [top, bottom, left, right]
* @return pointer to memory with padding
* @return pointer to memory
*/
T *get_element_ptr(const std::vector<int> padding = {0, 0, 0, 0})
T *get_element_ptr()
{
assert(this->shape.size() == 3); // TODO: || this->shape.size() == 2
if (this->shape.size() == 3)
{
return this->element + ((this->padding[0] - padding[0]) * this->shape_with_padding[1] + (this->padding[2] - padding[2])) * this->shape_with_padding[2];
}
else if (this->shape.size() == 2)
{
printf("Tensor.get_element_ptr with this->shape.size() == 2 is not implemented.\n");
}
return NULL;
return this->element;
}
/**
* @brief Get the element value.
*
* @param index index in
* - 2D: [y, x, c]
* @param with_padding one of true or false,
* - true: make padding size in count
* - false: do not
* @return element value
* @param index the index of each dim.
* @return T element value
*/
T &get_element_value(const std::vector<int> index, const bool with_padding = false)
T get_element_value(const std::vector<int> index)
{
assert(index.size() == this->shape.size());
assert(this->shape.size() == 3); // TODO: || this->shape() == 2
int i = 0;
if (this->shape.size() == 3)
{
int y = index[0];
int x = index[1];
int c = index[2];
i = with_padding ? (y * this->shape_with_padding[1] + x) * this->shape_with_padding[2] + c : ((y + this->padding[0]) * this->shape_with_padding[1] + x + this->padding[2]) * this->shape_with_padding[2] + c;
}
else if (this->shape.size() == 2)
{
printf("Tensor.get_element_value with this->shape.size() == 2 is not implemented.\n");
}
return this->element[i];
return this->element[this->get_element_index(index)];
}
/**
* @brief Get the size of element.
* @brief Get the element value.
*
* @return size of element including padding
* @param index the index of the element.
* @return T element value
*/
T get_element_value(int index)
{
return this->element[index];
}
/**
* @brief Get the size of Tensor.
*
* @return the size of Tensor.
*/
int get_size()
{
if (this->size == -1) // didn't call Tensor.set_padding_size() before
{
this->size = 1;
for (std::vector<int>::iterator d = this->shape.begin(); d != this->shape.end(); d++)
this->size *= *d;
}
return this->size;
}
/**
* @brief Get the axis offset
*
* @return std::vector<int> the axis offset
*/
std::vector<int> get_axis_offset()
{
return this->axis_offset;
}
/**
* @brief Apply memory with zero-initialized only if this->element is NULL.
*
@ -319,7 +293,7 @@ namespace dl
if (this->element != NULL)
return false;
this->element = (T *)dl::tool::calloc_aligned(this->get_size(), sizeof(T), 16);
this->element = (T *)dl::tool::calloc_aligned_prefer(this->get_size(), sizeof(T), 16);
this->auto_free = auto_free;
return true;
@ -340,31 +314,7 @@ namespace dl
if (this->element != NULL)
return false;
this->element = (T *)tool::malloc_aligned(this->get_size(), sizeof(T), 16);
this->auto_free = auto_free;
return true;
}
/**
* @brief If this->element != NULL no memory will be applied and no value will be set in padding.
* Else apply memory without initialized and set value to padding.
*
* @param padding_value value to set in padding
* @param auto_free one of true of false
* - true: free element when object destroyed
* - false: do not
* @return
* - true: apply memory and set padding value successfully
* - false: no memory applied and no padding value set
*/
bool apply_element(const T padding_value = 0, const bool auto_free = true)
{
if (this->element != NULL)
return false;
this->element = (T *)tool::malloc_aligned(this->get_size(), sizeof(T), 16);
this->set_padding_value(this->padding, padding_value);
this->element = (T *)tool::malloc_aligned_prefer(this->get_size(), sizeof(T), 16);
this->auto_free = auto_free;
return true;
@ -379,258 +329,56 @@ namespace dl
{
if (this->auto_free && this->element)
{
tool::free_aligned(this->element);
tool::free_aligned_prefer(this->element);
this->element = NULL;
}
}
/**
* @brief Print the shape of Tensor in format "shape = ({top_padding} + {height} + {bottom_padding}, {left_padding} + {width} + {right_padding}, {channel}(channel_with_padding))\n".
* @brief print the element of the tensor
*
* @param axis_index_range the element range of each dims to be print. if axis_index_range == {}, all the element will be print.
* @param message to print
*/
void print_shape()
{
printf("shape = (%d + %d + %d, %d + %d + %d, %d(%d))\n",
this->padding[0], this->shape[0], this->padding[1],
this->padding[2], this->shape[1], this->padding[3],
this->shape[2], this->shape_with_padding[2]);
}
void print(std::vector<int> axis_index_range = {}, const char *message = "");
/**
* @brief Take numpy for example, this function print Tensor[y_start:y_end, x_start:x_end, c_start:c_end].
* @brief print all the element of the Tensor.
*
* inner box is effective value of Tensor, "0" around is padding.
*
* (with padding)
* 00000000000000000000000000000000000000000000000000
* 00000000000000000000000000000000000000000000000000
* 00000000000000000000000000000000000000000000000000
* 000000(without padding) 00000000
* 000000 00000000
* 000000 00000000
* 000000 effective value 00000000
* 000000 00000000
* 000000 00000000
* 00000000000000000000000000000000000000000000000000
* 00000000000000000000000000000000000000000000000000
* 00000000000000000000000000000000000000000000000000
*
* @param y_start start index in height
* @param y_end end index in height
* @param x_start start index in width
* @param x_end end index in width
* @param c_start start index in channel
* @param c_end end index in channel
* @param message to print
* @param axis print aligned this axis, effective only if all y_end - y_start, x_end - x_start and c_end - c_start equals to 1
* @param message to print
* @param with_padding one of true or false,
* - true: count from (with padding) in upper image
* - false: count from (without padding) in upper image
* - true: the padding element will also be ed
* - false: the padding element will not be ed
*/
void print(int y_start, int y_end,
int x_start, int x_end,
int c_start, int c_end,
const char *message, int axis = 0, const bool with_padding = false)
void print_all(const char *message = "")
{
assert(y_end > y_start);
assert(x_end > x_start);
assert(c_end > c_start);
y_start = DL_MAX(y_start, 0);
x_start = DL_MAX(x_start, 0);
c_start = DL_MAX(c_start, 0);
if (with_padding)
{
y_end = DL_MIN(y_end, this->shape_with_padding[0]);
x_end = DL_MIN(x_end, this->shape_with_padding[1]);
c_end = DL_MIN(c_end, this->shape_with_padding[2]);
}
else
{
y_end = DL_MIN(y_end, this->shape[0]);
x_end = DL_MIN(x_end, this->shape[1]);
c_end = DL_MIN(c_end, this->shape[2]);
}
printf("%s[%d:%d, %d:%d, %d:%d] | ", message, y_start, y_end, x_start, x_end, c_start, c_end);
std::cout << "\n"
<< message << " | ";
this->print_shape();
if (y_end - y_start == 1)
for (int i = 0; i < this->get_size(); i++)
{
if (x_end - x_start == 1)
{
for (int c = c_start; c < c_end; c++)
printf("%7d", c);
printf("\n");
for (int c = c_start; c < c_end; c++)
printf("%7d", this->get_element_value({y_start, x_start, c}, with_padding));
printf("\n");
return;
}
else
{
if (c_end - c_start == 1)
{
for (int x = x_start; x < x_end; x++)
printf("%7d", x);
printf("\n");
for (int x = x_start; x < x_end; x++)
printf("%7d", this->get_element_value({y_start, x, c_start}, with_padding));
printf("\n");
return;
}
}
std::cout << this->element[i] << " ";
}
else
{
if (x_end - x_start == 1)
{
if (c_end - c_start == 1)
{
for (int y = y_start; y < y_end; y++)
printf("%7d", y);
printf("\n");
for (int y = y_start; y < y_end; y++)
printf("%7d", this->get_element_value({y, x_start, c_start}, with_padding));
printf("\n");
return;
}
}
}
if (y_end - y_start == 1)
axis = 0;
if (x_end - x_start == 1)
axis = 1;
if (c_end - c_start == 1)
axis = 2;
if (axis == 0)
{
// ______c
// |
// |
// x
//
for (int y = y_start; y < y_end; y++)
{
printf("y = %d\n ", y);
for (int c = c_start; c < c_end; c++)
printf("%7d", c);
printf("\n");
for (int x = x_start; x < x_end; x++)
{
printf("%5d", x);
for (int c = c_start; c < c_end; c++)
printf("%7d", this->get_element_value({y, x, c}, with_padding));
printf("\n");
}
printf("\n");
}
}
else if (axis == 1)
{
// ______c
// |
// |
// y
//
for (int x = x_start; x < x_end; x++)
{
printf("x = %d\n ", x);
for (int c = c_start; c < c_end; c++)
printf("%7d", c);
printf("\n");
for (int y = y_start; y < y_end; y++)
{
printf("%5d", y);
for (int c = c_start; c < c_end; c++)
printf("%7d", this->get_element_value({y, x, c}, with_padding));
printf("\n");
}
printf("\n");
}
}
else
{
// ______x
// |
// |
// y
//
for (int c = c_start; c < c_end; c++)
{
printf("c = %d\n ", c);
for (int x = x_start; x < x_end; x++)
printf("%7d", x);
printf("\n");
for (int y = y_start; y < y_end; y++)
{
printf("%5d", y);
for (int x = x_start; x < x_end; x++)
printf("%7d", this->get_element_value({y, x, c}, with_padding));
printf("\n");
}
printf("\n");
}
}
std::cout << "\n";
return;
}
/**
* @brief print all the element of the Tensor.
* @brief Get the index of each dims
*
* @param message to print
* @param with_padding one of true or false,
* - true: the padding element will also be printed
* - false: the padding element will not be printed
* @param element_index the index of the element
* @return std::vector<int> the index of each dims
*/
void print_all(const char *message, const bool with_padding = false)
{
int y_end;
int x_end;
int c_end;
if (with_padding)
{
y_end = this->shape_with_padding[0];
x_end = this->shape_with_padding[1];
c_end = this->shape_with_padding[2];
}
else
{
y_end = this->shape[0];
x_end = this->shape[1];
c_end = this->shape[2];
}
std::vector<int> get_axis_index(int element_index);
printf("\n%s | ", message);
this->print_shape();
for (int y = 0; y < y_end; y++)
{
for (int x = 0; x < x_end; x++)
{
for (int c = 0; c < c_end; c++)
printf("%d ", this->get_element_value({y, x, c}, with_padding));
}
}
printf("\n");
return;
}
/**
* @brief Get the index of element
*
* @param axis_index the index of each dims
* @return int the index of element
*/
int get_element_index(const std::vector<int> axis_index);
/**
* @brief Check the element value with input ground-truth.
@ -638,35 +386,39 @@ namespace dl
* @param gt_element ground-truth value of element
* @param bias permissible error
* @param info one of true or false
* - true: print shape and result
* - true: shape and result
* - false: do not
* @param failed_number maximum number of wrong element that will be printed
*
* @return
* - true: in permissible error
* - false: not
*/
bool check_element(T *gt_element, int bias = 2, bool info = true)
bool check_element(T *gt_element, int bias = 2, bool info = true, int failed_number = 0)
{
int count = 0;
if (info)
this->print_shape();
int i = 0;
for (int y = 0; y < this->shape[0]; y++)
int size = this->get_size();
for (int i = 0; i < size; i++)
{
for (int x = 0; x < this->shape[1]; x++)
if (DL_ABS(this->element[i] - gt_element[i]) > bias)
{
for (int c = 0; c < this->shape[2]; c++)
std::vector<int> index = get_axis_index(i);
std::cout << "element[";
for (int j = 0; j < index.size() - 1; j++)
{
int a = this->get_element_value({y, x, c});
int b = gt_element[i];
int offset = DL_ABS(a - b);
if (offset > bias)
{
printf("element[%d, %d, %d]: %d v.s. %d\n", y, x, c, a, b);
return false;
}
i++;
std::cout << index[j] << ", ";
}
std::cout << index.back() << "]: ";
std::cout << +this->element[i] << " v.s. " << +gt_element[i] << "\n";
count++;
if (count > failed_number)
return false;
}
}
if (count)
return false;
if (info)
printf("PASS\n");
@ -700,35 +452,44 @@ namespace dl
Tensor<T> &operator=(const Tensor<T> &input)
{
this->size = input.size;
this->auto_free = input.auto_free;
this->exponent = input.exponent;
this->shape = input.shape;
this->padding = input.padding;
int size_real_tmp = this->shape_with_padding.size() ? this->shape_with_padding[0] * this->shape_with_padding[1] * this->shape_with_padding[2] : 0;
int size_input_real = input.shape_with_padding.size() ? input.shape_with_padding[0] * input.shape_with_padding[1] * input.shape_with_padding[2] : 0;
this->shape_with_padding = input.shape_with_padding;
if (this->element)
int size_real_tmp = this->size;
int size_input_real = input.size;
this->set_shape(input.shape);
if (input.element)
{
if (size_real_tmp != size_input_real)
if (this->element)
{
tool::free_aligned(this->element);
T *new_element = (T *)tool::calloc_aligned(size_input_real, sizeof(T), 16);
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
this->element = new_element;
if (size_real_tmp != size_input_real)
{
tool::free_aligned_prefer(this->element);
T *new_element = (T *)tool::malloc_aligned_prefer(size_input_real, sizeof(T), 16);
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
this->element = new_element;
}
else
{
tool::copy_memory(this->element, input.element, size_input_real * sizeof(T));
}
}
else
{
tool::copy_memory(this->element, input.element, size_input_real * sizeof(T));
T *new_element = (T *)tool::malloc_aligned_prefer(size_input_real, sizeof(T), 16);
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
this->element = new_element;
}
return *this;
}
else
{
T *new_element = (T *)tool::calloc_aligned(size_input_real, sizeof(T), 16);
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
this->element = new_element;
if (this->element)
{
tool::free_aligned_prefer(this->element);
this->element = NULL;
}
return *this;
}
return *this;
}
};
} // namespace dl