mirror of
https://github.com/0xFEEDC0DE64/arduino-esp32.git
synced 2025-07-02 13:30:59 +02:00
IDF master b86fe0c66c
This commit is contained in:
@ -10,7 +10,7 @@
|
||||
#define DL_LOG_LAYER_LATENCY 0 /*<! - 1: print the latency of each parts of layer */
|
||||
/*<! - 0: mute */
|
||||
|
||||
#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT || CONFIG_ESP32S3_SPIRAM_SUPPORT
|
||||
#if CONFIG_SPIRAM_SUPPORT || CONFIG_ESP32_SPIRAM_SUPPORT || CONFIG_ESP32S2_SPIRAM_SUPPORT || CONFIG_ESP32S3_SPIRAM_SUPPORT
|
||||
#define DL_SPIRAM_SUPPORT 1
|
||||
#else
|
||||
#define DL_SPIRAM_SUPPORT 0
|
||||
@ -83,8 +83,17 @@ namespace dl
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PADDING_VALID, /*<! no padding >*/
|
||||
PADDING_SAME, /*<! SAME in TensorFlow style >*/
|
||||
PADDING_SAME_MXNET /*<! SAME in MXNET style >*/
|
||||
PADDING_NOT_SET,
|
||||
PADDING_VALID, /*<! no padding >*/
|
||||
PADDING_SAME_BEGIN, /*<! SAME in MXNET style >*/
|
||||
PADDING_SAME_END, /*<! SAME in TensorFlow style >*/
|
||||
} padding_type_t;
|
||||
} // namespace dl
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CONSTANT,
|
||||
EDGE,
|
||||
REFLECT,
|
||||
SYMMETRIC,
|
||||
} padding_mode_t;
|
||||
} // namespace dl
|
||||
|
@ -370,11 +370,70 @@ namespace dl
|
||||
*/
|
||||
uint32_t get_moving_point_number(uint8_t *f1, uint8_t *f2, const uint32_t height, const uint32_t width, const uint32_t stride, const uint32_t threshold = 5);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Apply an affine transformation to an image.
|
||||
*
|
||||
* @tparam T
|
||||
* @param input the input image.
|
||||
* @param output the output image.
|
||||
* @param M_inv the inverse transformation matrix.
|
||||
*/
|
||||
template <typename T>
|
||||
void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
|
||||
|
||||
/**
|
||||
* @brief Apply an affine transformation to an image.
|
||||
*
|
||||
* @tparam T
|
||||
* @param input the pointer of the input image.
|
||||
* @param shape the shape of the input image.
|
||||
* @param output the output image.
|
||||
* @param M_inv the inverse transformation matrix.
|
||||
*/
|
||||
template <typename T>
|
||||
void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
|
||||
|
||||
/**
|
||||
* @brief Get the otsu thresh object.
|
||||
*
|
||||
* @param image the gray image.
|
||||
* @return uint8_t the otsu thresh.
|
||||
*/
|
||||
uint8_t get_otsu_thresh(Tensor<uint8_t> &image);
|
||||
|
||||
/**
|
||||
* @brief Convert RGB image to gray image
|
||||
*
|
||||
* @param image input image
|
||||
* @param bgr true: the image is in BGR format
|
||||
* false: the image is in RGB format
|
||||
* @return Tensor<uint8_t>* output image in gray format
|
||||
*/
|
||||
Tensor<uint8_t> *rgb2gray(Tensor<uint8_t> &image, bool bgr = false);
|
||||
|
||||
/**
|
||||
* @brief Convert RGB image to LAB image
|
||||
*
|
||||
* @param image input image
|
||||
* @param bgr true: the image is in BGR format
|
||||
* false: the image is in RGB format
|
||||
* @param fast true: use the fast alogrithm, but the accuracy will be reduced
|
||||
* false: do not use the fast alogrithm
|
||||
* @return Tensor<uint8_t>* output image in LAB foramt
|
||||
*/
|
||||
Tensor<uint8_t> *rgb2lab(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
|
||||
|
||||
/**
|
||||
* @brief Convert RGB image to HSV image
|
||||
*
|
||||
* @param image input image
|
||||
* @param bgr true: the image is in BGR format
|
||||
* false: the image is in RGB format
|
||||
* @param fast true: use the fast alogrithm, but the accuracy will be reduced
|
||||
* false: do not use the fast alogrithm
|
||||
* @return Tensor<uint8_t>* output image in HSV format
|
||||
*/
|
||||
Tensor<uint8_t> *rgb2hsv(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
|
||||
|
||||
} // namespace image
|
||||
} // namespace dl
|
||||
|
@ -25,7 +25,8 @@ namespace dl
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of add2d >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of add2d >*/
|
||||
|
||||
public:
|
||||
/**
|
||||
@ -35,19 +36,21 @@ namespace dl
|
||||
* @param activation activation of add2d, if you don't specify anything, no activation is applied
|
||||
* @param name name of add2d
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Add2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name), activation(activation), output_exponent(output_exponent), output(NULL)
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
Add2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = "Add2D", bool inplace = false) : Layer(name),
|
||||
activation(activation),
|
||||
output_exponent(output_exponent),
|
||||
output(NULL),
|
||||
inplace(inplace),
|
||||
output_shape({}) {}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Add2D object
|
||||
*/
|
||||
~Add2D()
|
||||
{
|
||||
if((!this->inplace) && (this->output != NULL))
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
@ -59,10 +62,12 @@ namespace dl
|
||||
*
|
||||
* @param input0 as one input
|
||||
* @param input1 as another input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
this->output_shape = input0.shape;
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
@ -78,6 +83,11 @@ namespace dl
|
||||
{
|
||||
this->output = &input0;
|
||||
}
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -105,7 +115,11 @@ namespace dl
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -116,6 +130,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::add2d(*this->output, input0, input1, this->activation, assign_core, this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "add2d");
|
||||
}
|
||||
|
@ -24,23 +24,26 @@ namespace dl
|
||||
std::vector<int> filter_shape; /*<! filter shape in [filter_height, filter_width] >*/
|
||||
const int stride_y; /*<! stride in height >*/
|
||||
const int stride_x; /*<! stride in width >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
|
||||
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of AvgPool2D >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of AvgPool2D >*/
|
||||
std::vector<int> output_shape; /*<! output shape of AvgPool2D >*/
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new AvgPool2D object.
|
||||
*
|
||||
* @param output_exponent exponent of output
|
||||
* @param filter_shape filter shape in [filter_height, filter_width]
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
|
||||
* - PADDING_VALID means no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* - PADDING_NOT_SET means padding with the specific "padding" value below.
|
||||
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
|
||||
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param name name of layer
|
||||
@ -48,16 +51,23 @@ namespace dl
|
||||
AvgPool2D(const int output_exponent,
|
||||
const std::vector<int> filter_shape,
|
||||
const padding_type_t padding_type = PADDING_VALID,
|
||||
std::vector<int> padding = {},
|
||||
const int stride_y = 1,
|
||||
const int stride_x = 1,
|
||||
const char *name = NULL) : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter_shape(filter_shape),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type)
|
||||
const char *name = "AvgPool2D") : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter_shape(filter_shape),
|
||||
padding_type(padding_type),
|
||||
padding(padding),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
if (this->padding_type == PADDING_NOT_SET)
|
||||
{
|
||||
assert(this->padding.size() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -66,7 +76,7 @@ namespace dl
|
||||
*/
|
||||
~AvgPool2D()
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
@ -76,20 +86,31 @@ namespace dl
|
||||
* @brief Update output shape and padding.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
std::vector<int> output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
this->output->set_shape(output_shape);
|
||||
assert(input.shape.size() == 3);
|
||||
|
||||
this->output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
|
||||
this->padding = nn::get_pad_size(output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
input.set_padding_size(this->padding);
|
||||
this->output->free_element();
|
||||
}
|
||||
if (this->padding_type != PADDING_NOT_SET)
|
||||
{
|
||||
this->padding = nn::get_pad_size(this->output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
}
|
||||
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
@ -108,7 +129,6 @@ namespace dl
|
||||
* @param autoload_enable one of true or false,
|
||||
* - true: load input and output from PSRAM to CACHE automatically
|
||||
* - false: do not
|
||||
* @param assign_core not effective yet
|
||||
* @return AvgPool2D result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input, uint8_t autoload_enable = 0)
|
||||
@ -116,7 +136,11 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_tool_cache.hpp"
|
||||
#include <iostream>
|
||||
|
||||
namespace dl
|
||||
{
|
||||
|
@ -0,0 +1,139 @@
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <vector>
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
#include "dl_nn_concat.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief Concat(input1, input2, input3, ...).
|
||||
*
|
||||
* @tparam feature_t support all kinds of integer and float data type
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class Concat : Layer
|
||||
{
|
||||
private:
|
||||
int output_exponent; /*<! exponent of output >*/
|
||||
int axis; /*<! The axis along which the Tensor will be concatenated. >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Concat >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Concat >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Concat object.
|
||||
*
|
||||
* @param name name of layer
|
||||
* @param axis The axis along which the Tensor will be concatenated.
|
||||
*/
|
||||
Concat(int axis, const char *name = "Concat") : Layer(name), axis(axis), output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Concat object
|
||||
*/
|
||||
~Concat()
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Collect inputs' channel and memory offset, called in Model.build().
|
||||
*
|
||||
* @param args pointers of concatenated Tensor
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(std::vector<Tensor<feature_t> *> args, bool print_shape = false)
|
||||
{
|
||||
assert(args.size() > 1);
|
||||
int shape_size = args[0]->shape.size();
|
||||
|
||||
if (this->axis < 0)
|
||||
{
|
||||
this->axis = shape_size + this->axis;
|
||||
}
|
||||
assert((this->axis < shape_size) && (this->axis > -1));
|
||||
|
||||
int output_shape_axis = args[0]->shape[this->axis];
|
||||
|
||||
for (int i = 1; i < args.size(); i++)
|
||||
{
|
||||
assert(shape_size == args[i]->shape.size());
|
||||
assert(args[i]->exponent == args[i - 1]->exponent);
|
||||
output_shape_axis += args[i]->shape[this->axis];
|
||||
|
||||
for (int j = 0; j < shape_size; j++)
|
||||
{
|
||||
if (j != this->axis)
|
||||
{
|
||||
assert(args[i]->shape[j] == args[i - 1]->shape[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->output_exponent = args[0]->exponent;
|
||||
this->output_shape = args[0]->shape;
|
||||
this->output_shape[this->axis] = output_shape_axis;
|
||||
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call Concat operation
|
||||
*
|
||||
* @param inputs the pointers of inputs
|
||||
* @param free_inputs true: free the inputs after call
|
||||
* false: do not free inputs
|
||||
* @return Tensor<feature_t>& concat result
|
||||
*/
|
||||
Tensor<feature_t> &call(std::vector<Tensor<feature_t> *> inputs, bool free_inputs = false)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
nn::concat(*this->output, inputs, this->axis, free_inputs);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "concat");
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& Concat result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -13,8 +13,11 @@ namespace dl
|
||||
* @tparam feature_t supports int16_t and int8_t,
|
||||
* - int16_t: stands for operation in int16_t quantize
|
||||
* - int8_t: stands for operation in int8_t quantize
|
||||
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
|
||||
* - int16_t: for int16 quantization and int8 per-channel quantization
|
||||
* - int8_t: for int8 per-tensor quantization
|
||||
*/
|
||||
template <typename feature_t>
|
||||
template <typename feature_t, typename bias_t = feature_t>
|
||||
class Conv2D : public Layer
|
||||
{
|
||||
private:
|
||||
@ -22,14 +25,14 @@ namespace dl
|
||||
const Filter<feature_t> *filter; /*<! filter of Conv2D >*/
|
||||
const int stride_y; /*<! stride in height >*/
|
||||
const int stride_x; /*<! stride in width >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
|
||||
const Bias<feature_t> *bias; /*<! bias of Conv2D, if you don't specify anything, no bias is added >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
|
||||
const Bias<bias_t> *bias; /*<! bias of Conv2D, if you don't specify anything, no bias is added >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of Conv2D, if you don't specify anything, no activation is applied >*/
|
||||
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Conv2D >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Conv2D >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Conv2D >*/
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new Conv2D object.
|
||||
*
|
||||
@ -37,33 +40,43 @@ namespace dl
|
||||
* @param filter filter of Conv2D
|
||||
* @param bias bias of Conv2D, if you don't specify anything, no bias is added
|
||||
* @param activation activation of Conv2D, if you don't specify anything, no activation is applied
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
|
||||
* - PADDING_VALID means no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* - PADDING_NOT_SET means padding with the specific "padding" value below.
|
||||
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
|
||||
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param name name of layer
|
||||
*/
|
||||
Conv2D(const int output_exponent,
|
||||
const Filter<feature_t> *filter,
|
||||
const Bias<feature_t> *bias = NULL,
|
||||
const Bias<bias_t> *bias = NULL,
|
||||
const Activation<feature_t> *activation = NULL,
|
||||
const padding_type_t padding_type = PADDING_VALID,
|
||||
std::vector<int> padding = {},
|
||||
const int stride_y = 1,
|
||||
const int stride_x = 1,
|
||||
const char *name = NULL) : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter(filter),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type),
|
||||
bias(bias),
|
||||
activation(activation)
|
||||
const char *name = "Conv2D") : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter(filter),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type),
|
||||
bias(bias),
|
||||
activation(activation),
|
||||
padding(padding),
|
||||
output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
if (this->padding_type == PADDING_NOT_SET)
|
||||
{
|
||||
assert(this->padding.size() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -82,19 +95,30 @@ namespace dl
|
||||
* @brief Update output padding and input padding.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
assert(input.shape.size() == 3);
|
||||
assert(this->filter->shape.size() == 4);
|
||||
assert(input.shape[2] == this->filter->shape[2]);
|
||||
|
||||
std::vector<int> output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, true);
|
||||
this->output->set_shape(output_shape);
|
||||
this->output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, true, this->padding);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->free_element();
|
||||
if (this->padding_type != PADDING_NOT_SET)
|
||||
{
|
||||
this->padding = nn::get_pad_size(this->output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
|
||||
}
|
||||
|
||||
this->padding = nn::get_pad_size(output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
|
||||
input.set_padding_size(this->padding);
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -122,7 +146,11 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -153,5 +181,6 @@ namespace dl
|
||||
dl::tool::cache::preload_func((uint32_t)(this->filter->element), size);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace layer
|
||||
} // namespace dl
|
||||
|
@ -13,8 +13,11 @@ namespace dl
|
||||
* @tparam feature_t supports int16_t and int8_t,
|
||||
* - int16_t: stands for operation in int16_t quantize
|
||||
* - int8_t: stands for operation in int8_t quantize
|
||||
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
|
||||
* - int16_t: for int16 quantization and int8 per-channel quantization
|
||||
* - int8_t: for int8 per-tensor quantization
|
||||
*/
|
||||
template <typename feature_t>
|
||||
template <typename feature_t, typename bias_t = feature_t>
|
||||
class DepthwiseConv2D : public Layer
|
||||
{
|
||||
private:
|
||||
@ -22,14 +25,14 @@ namespace dl
|
||||
const Filter<feature_t> *filter; /*<! filter of DepthwiseConv2D >*/
|
||||
const int stride_y; /*<! stride in height >*/
|
||||
const int stride_x; /*<! stride in width >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
|
||||
const Bias<feature_t> *bias; /*<! bias of DepthwiseConv2D, if you don't specify anything, no bias is added >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
|
||||
const Bias<bias_t> *bias; /*<! bias of DepthwiseConv2D, if you don't specify anything, no bias is added >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of DepthwiseConv2D, if you don't specify anything, no activation is applied >*/
|
||||
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of DepthwiseConv2D >*/
|
||||
std::vector<int> output_shape; /*<! output shape of DepthwiseConv2D >*/
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new DepthwiseConv2D object.
|
||||
*
|
||||
@ -37,40 +40,50 @@ namespace dl
|
||||
* @param filter filter of DepthwiseConv2D
|
||||
* @param bias bias of DepthwiseConv2D, if you don't specify anything, no bias is added
|
||||
* @param activation activation of DepthwiseConv2D, if you don't specify anything, no activation is applied
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
|
||||
* - PADDING_VALID means no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* - PADDING_NOT_SET means padding with the specific "padding" value below.
|
||||
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
|
||||
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
|
||||
* @param stride_y - stride in height
|
||||
* @param stride_x - stride in width
|
||||
* @param name name of layer
|
||||
*/
|
||||
DepthwiseConv2D(const int output_exponent,
|
||||
const Filter<feature_t> *filter,
|
||||
const Bias<feature_t> *bias = NULL,
|
||||
const Bias<bias_t> *bias = NULL,
|
||||
const Activation<feature_t> *activation = NULL,
|
||||
const padding_type_t padding_type = PADDING_VALID,
|
||||
std::vector<int> padding = {},
|
||||
const int stride_y = 1,
|
||||
const int stride_x = 1,
|
||||
const char *name = NULL) : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter(filter),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type),
|
||||
bias(bias),
|
||||
activation(activation)
|
||||
const char *name = "DepthwiseConv2D") : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
filter(filter),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type),
|
||||
bias(bias),
|
||||
activation(activation),
|
||||
padding(padding),
|
||||
output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
if (this->padding_type == PADDING_NOT_SET)
|
||||
{
|
||||
assert(this->padding.size() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the DepthwiseConv2D object.
|
||||
*
|
||||
*/
|
||||
~DepthwiseConv2D()
|
||||
~DepthwiseConv2D()
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
@ -82,19 +95,31 @@ namespace dl
|
||||
* @brief Update output shape and padding.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
assert(input.shape.size() == 3);
|
||||
assert(this->filter->shape.size() == 4);
|
||||
assert(input.shape[2] == this->filter->shape[2]);
|
||||
|
||||
std::vector<int> output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
|
||||
this->output->set_shape(output_shape);
|
||||
this->output_shape = nn::get_output_shape(input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
|
||||
this->padding = nn::get_pad_size(output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
|
||||
input.set_padding_size(this->padding);
|
||||
if (this->padding_type != PADDING_NOT_SET)
|
||||
{
|
||||
this->padding = nn::get_pad_size(this->output_shape, input.shape, this->filter->shape_with_dilation, this->stride_y, this->stride_x, this->padding_type);
|
||||
}
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -122,7 +147,12 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
|
@ -0,0 +1,128 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @tparam feature_t
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class ExpandDims : public Layer
|
||||
{
|
||||
private:
|
||||
std::vector<int> output_shape; /*<! output shape of ExpandDims >*/
|
||||
std::vector<int> axis; /*<! position where the new axis is placed. >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of ExpandDims >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
|
||||
public:
|
||||
int output_exponent;
|
||||
|
||||
/**
|
||||
* @brief Construct a new ExpandDims object
|
||||
*
|
||||
* @param axis position where the new axis is placed.
|
||||
* @param name name of layer
|
||||
* @param inplace true: the output will store to input
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
ExpandDims(std::vector<int> axis, const char *name = "ExpandDims", bool inplace = false) : Layer(name),
|
||||
axis(axis), inplace(inplace), output_shape({})
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the ExpandDims object
|
||||
*
|
||||
*/
|
||||
~ExpandDims()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output shape.
|
||||
*
|
||||
* @param input as an input.
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
this->output_exponent = input.exponent;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->expand_dims(this->axis);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->expand_dims(this->axis);
|
||||
}
|
||||
this->output_shape = this->output->shape;
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& ExpandDims result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief call ExpandDims opeartion
|
||||
*
|
||||
* @param input
|
||||
* @return Tensor<feature_t>& ExpandDims result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->copy_element(input, true);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "ExpandDims");
|
||||
}
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_shape(this->output_shape);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "ExpandDims");
|
||||
}
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -0,0 +1,120 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @tparam feature_t
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class Flatten : public Layer
|
||||
{
|
||||
private:
|
||||
int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Flatten >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Flatten >*/
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Flatten object
|
||||
*
|
||||
* @param name name of layer
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Flatten(const char *name = "Flatten", bool inplace = false) : Layer(name), inplace(inplace), output_shape({})
|
||||
{}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Flatten object
|
||||
*
|
||||
*/
|
||||
~Flatten()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output shape.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
this->output_exponent = input.exponent;
|
||||
this->output_shape = {input.get_size()};
|
||||
if (!this->inplace)
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& Flatten result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call Flatten operation.
|
||||
*
|
||||
* @param input as an input
|
||||
* @return Tensor<feature_t>& Flatten result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->flatten();
|
||||
this->output->copy_element(input, true);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "flatten");
|
||||
}
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->flatten();
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "flatten");
|
||||
}
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -0,0 +1,167 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_nn_fully_connected.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief Activation(FullyConnected(input, filter) + bias).
|
||||
*
|
||||
* @tparam feature_t supports int16_t and int8_t,
|
||||
* - int16_t: stands for operation in int16_t quantize
|
||||
* - int8_t: stands for operation in int8_t quantize
|
||||
* @tparam bias_t supports int16_t and int8_t, must specify when using int8 per-channel quantization
|
||||
* - int16_t: for int16 quantization and int8 per-channel quantization
|
||||
* - int8_t: for int8 per-tensor quantization
|
||||
*/
|
||||
template <typename feature_t, typename bias_t = feature_t>
|
||||
class FullyConnected : public Layer
|
||||
{
|
||||
private:
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
const bool flatten; /*<! true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim] >*/
|
||||
const Filter<feature_t> *filter; /*<! filter of FullyConnected >*/
|
||||
const Bias<bias_t> *bias; /*<! bias of FullyConnected, if you don't specify anything, no bias is added >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of FullyConnected, if you don't specify anything, no activation is applied >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of FullyConnected >*/
|
||||
std::vector<int> output_shape; /*<! output shape of FullyConnected >*/
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new FullyConnected object.
|
||||
*
|
||||
* @param output_exponent exponent of output
|
||||
* @param filter filter of FullyConnected
|
||||
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
|
||||
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
|
||||
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
|
||||
* @param name name of layer
|
||||
*/
|
||||
FullyConnected(const int output_exponent,
|
||||
const Filter<feature_t> *filter,
|
||||
const Bias<bias_t> *bias = NULL,
|
||||
const Activation<feature_t> *activation = NULL,
|
||||
const bool flatten = true,
|
||||
const char *name = "FullyConnected") : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
flatten(flatten),
|
||||
filter(filter),
|
||||
bias(bias),
|
||||
activation(activation),
|
||||
output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the FullyConnected object.
|
||||
*
|
||||
*/
|
||||
~FullyConnected()
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output padding and input padding.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(this->filter->shape.size() == 4);
|
||||
assert(this->filter->shape[0] == 1);
|
||||
assert(this->filter->shape[1] == 1);
|
||||
if (this->flatten)
|
||||
{
|
||||
assert(input.get_size() == this->filter->shape[2]);
|
||||
this->output_shape = {this->filter->shape[3]};
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(input.shape.back() == this->filter->shape[2]);
|
||||
this->output_shape = input.shape;
|
||||
this->output_shape[this->output_shape.size() - 1] = this->filter->shape[3];
|
||||
}
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& FullyConnected result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call FullyConnected operation
|
||||
*
|
||||
* @param input as an input.
|
||||
* @param autoload_enable one of true or false,
|
||||
* - true: load input and output from PSRAM to CACHE automatically
|
||||
* - false: do not
|
||||
* @param assign_core not effective yet
|
||||
* @return FullyConnected result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input, bool autoload_enable = false, const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
if (autoload_enable)
|
||||
{
|
||||
dl::tool::cache::autoload_func((uint32_t)(this->output->element), this->output->get_size() * sizeof(feature_t),
|
||||
(uint32_t)(input.element), input.get_size() * sizeof(feature_t));
|
||||
}
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
nn::fully_connected(*this->output, input, *(this->filter), this->bias, this->activation, this->flatten, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "fully_connected");
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Preload the filter to Cache.
|
||||
* NOTE: Call this layer's preload() before previous layer's call() such that filter could be loaded while previous layer is doing calculation.
|
||||
*/
|
||||
void preload()
|
||||
{
|
||||
size_t size = sizeof(feature_t);
|
||||
int shape_size = this->filter->shape.size();
|
||||
for (int i = 0; i < shape_size; ++i)
|
||||
{
|
||||
size *= filter->shape[i];
|
||||
}
|
||||
dl::tool::cache::preload_func((uint32_t)(this->filter->element), size);
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -20,8 +20,9 @@ namespace dl
|
||||
class GlobalAveragePool2D : public Layer
|
||||
{
|
||||
private:
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of GlobalAveragePool2D >*/
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
std::vector<int> output_shape; /*<! output shape of GlobalAveragePool2D >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of GlobalAveragePool2D >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new GlobalAveragePool2D object.
|
||||
@ -29,8 +30,9 @@ namespace dl
|
||||
* @param output_exponent exponent of output
|
||||
* @param name name of layer
|
||||
*/
|
||||
GlobalAveragePool2D(const int output_exponent, const char *name = NULL) : Layer(name),
|
||||
output_exponent(output_exponent)
|
||||
GlobalAveragePool2D(const int output_exponent, const char *name = "GlobalAveragePool2D") : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
output_shape({})
|
||||
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
@ -52,17 +54,26 @@ namespace dl
|
||||
* @brief Update output shape.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
assert(input.shape.size() == 3);
|
||||
|
||||
std::vector<int> output_shape(input.shape.size(), 1);
|
||||
output_shape[2] = input.shape[2];
|
||||
this->output->set_shape(output_shape);
|
||||
this->output_shape = output_shape;
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -90,7 +101,11 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
|
@ -20,15 +20,15 @@ namespace dl
|
||||
class GlobalMaxPool2D : public Layer
|
||||
{
|
||||
private:
|
||||
Tensor<feature_t> *output; /*<! output ptr of GlobalMaxPool2D >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of GlobalMaxPool2D >*/
|
||||
std::vector<int> output_shape; /*<! output shape of GlobalMaxPool2D >*/
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new GlobalMaxPool2D object.
|
||||
*
|
||||
* @param name name of layer
|
||||
*/
|
||||
GlobalMaxPool2D(const char *name = NULL) : Layer(name)
|
||||
GlobalMaxPool2D(const char *name = "GlobalMaxPool2D") : Layer(name), output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
@ -49,17 +49,26 @@ namespace dl
|
||||
* @brief Update output shape and exponent.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
assert(input.shape.size() == 3);
|
||||
this->output->set_exponent(input.exponent);
|
||||
|
||||
std::vector<int> output_shape(input.shape.size(), 1);
|
||||
output_shape[2] = input.shape[2];
|
||||
this->output->set_shape(output_shape);
|
||||
this->output_shape = output_shape;
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -87,7 +96,11 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_nn_LeakyReLU.hpp"
|
||||
#include "dl_nn_leakyrelu.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
@ -20,13 +20,13 @@ namespace dl
|
||||
class LeakyReLU : public Layer
|
||||
{
|
||||
private:
|
||||
feature_t activation_alpha; /*<! quantized alpha >*/
|
||||
int activation_exponent; /*<! exponent of quantized alpha >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of leakyrelu>*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
feature_t activation_alpha; /*<! quantized alpha >*/
|
||||
int activation_exponent; /*<! exponent of quantized alpha >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of leakyrelu>*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of leakyrelu >*/
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new LeakyReLU object
|
||||
*
|
||||
@ -34,9 +34,9 @@ namespace dl
|
||||
* @param activation_exponent exponent of quantized alpha
|
||||
* @param name name of leakyrelu
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
LeakyReLU(const int activation_alpha, const int activation_exponent, const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
|
||||
LeakyReLU(const int activation_alpha, const int activation_exponent, const char *name = "LeakyReLU", bool inplace = false) : Layer(name), output(NULL), output_shape({})
|
||||
{
|
||||
this->activation_alpha = activation_alpha;
|
||||
this->activation_exponent = activation_exponent;
|
||||
@ -47,7 +47,7 @@ namespace dl
|
||||
* @brief Destroy the LeakyReLU object
|
||||
*
|
||||
*/
|
||||
~LeakyReLU()
|
||||
~LeakyReLU()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -59,24 +59,32 @@ namespace dl
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
if(!this->inplace)
|
||||
this->output_shape = input.shape;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_shape(input.shape);
|
||||
}
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ -100,10 +108,14 @@ namespace dl
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -114,6 +126,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::leakyrelu<true>(*this->output, input, this->activation_alpha, this->activation_exponent, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "leakyrelu");
|
||||
}
|
||||
|
@ -22,28 +22,28 @@ namespace dl
|
||||
class Max2D : public Layer
|
||||
{
|
||||
private:
|
||||
Tensor<feature_t> *output; /*<! output ptr of max2d >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of max2d >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of max2d >*/
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new Max2D object.
|
||||
*
|
||||
* @param name name of max2d
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Max2D(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
|
||||
Max2D(const char *name = "Max2D", bool inplace = false) : Layer(name),
|
||||
output(NULL), inplace(inplace), output_shape({})
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Max2D object
|
||||
*
|
||||
*/
|
||||
~Max2D()
|
||||
~Max2D()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -58,24 +58,34 @@ namespace dl
|
||||
*
|
||||
* @param input0 as one input
|
||||
* @param input1 as another input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
assert(input0.exponent == input1.exponent);
|
||||
this->output_shape = input0.shape;
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(input0.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input0;
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -100,10 +110,14 @@ namespace dl
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input0.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -114,6 +128,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::max2d<true>(*this->output, input0, input1, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "max2d");
|
||||
}
|
||||
|
@ -23,44 +23,54 @@ namespace dl
|
||||
std::vector<int> filter_shape; /*<! filter shape in [filter_height, filter_width] >*/
|
||||
const int stride_y; /*<! stride in height >*/
|
||||
const int stride_x; /*<! stride in width >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET >*/
|
||||
const padding_type_t padding_type; /*<! one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN >*/
|
||||
std::vector<int> padding; /*<! padding size needed in [top, bottom, left, right] of this operation >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of MaxPool2D >*/
|
||||
std::vector<int> output_shape; /*<! output shape of MaxPool2D >*/
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new MaxPool2D object.
|
||||
*
|
||||
* @param filter_shape filter shape in [filter_height, filter_width]
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN or PADDING_NOT_SET,
|
||||
* - PADDING_VALID means no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* - PADDING_NOT_SET means padding with the specific "padding" value below.
|
||||
* @param padding if padding_type is PADDING_NOT_SET, this value will be used as padding size.
|
||||
* the shape must be 4, the value of each position is: [padding top, padding bottom, padding left, padding right]
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param name name of layer
|
||||
*/
|
||||
MaxPool2D(const std::vector<int> filter_shape,
|
||||
const padding_type_t padding_type = PADDING_VALID,
|
||||
std::vector<int> padding = {},
|
||||
const int stride_y = 1,
|
||||
const int stride_x = 1,
|
||||
const char *name = NULL) : Layer(name),
|
||||
filter_shape(filter_shape),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
padding_type(padding_type)
|
||||
const char *name = "MaxPool2D") : Layer(name),
|
||||
filter_shape(filter_shape),
|
||||
padding_type(padding_type),
|
||||
padding(padding),
|
||||
stride_y(stride_y),
|
||||
stride_x(stride_x),
|
||||
output_shape({})
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
if (this->padding_type == PADDING_NOT_SET)
|
||||
{
|
||||
assert(this->padding.size() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the MaxPool2D object.
|
||||
*
|
||||
*/
|
||||
~MaxPool2D()
|
||||
~MaxPool2D()
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
@ -72,18 +82,29 @@ namespace dl
|
||||
* @brief Update output shape and padding.
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
assert(input.shape[0] > 0);
|
||||
assert(input.shape[1] > 0);
|
||||
this->output->set_exponent(input.exponent);
|
||||
std::vector<int> output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
this->output->set_shape(output_shape);
|
||||
assert(input.shape.size() == 3);
|
||||
|
||||
this->padding = nn::get_pad_size(output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
input.set_padding_size(this->padding);
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output_shape = nn::get_output_shape(input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type, false, this->padding);
|
||||
this->output->set_shape(this->output_shape);
|
||||
|
||||
if (this->padding_type != PADDING_NOT_SET)
|
||||
{
|
||||
this->padding = nn::get_pad_size(this->output_shape, input.shape, filter_shape, this->stride_y, this->stride_x, this->padding_type);
|
||||
}
|
||||
this->output->free_element();
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -111,7 +132,11 @@ namespace dl
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
|
@ -22,28 +22,28 @@ namespace dl
|
||||
class Min2D : public Layer
|
||||
{
|
||||
private:
|
||||
Tensor<feature_t> *output; /*<! output of ptr min2d>*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
public:
|
||||
|
||||
Tensor<feature_t> *output; /*<! output of ptr min2d>*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of min2d >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Min2D object
|
||||
*
|
||||
* @param name name of min2d
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Min2D(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
Min2D(const char *name = "Min2D", bool inplace = false) : Layer(name),
|
||||
output(NULL),
|
||||
inplace(inplace),
|
||||
output_shape({}) {}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Min2D object
|
||||
*
|
||||
*/
|
||||
~Min2D()
|
||||
~Min2D()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -58,25 +58,34 @@ namespace dl
|
||||
*
|
||||
* @param input0 as one input
|
||||
* @param input1 as another input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
assert(input0.exponent == input1.exponent);
|
||||
this->output_shape = input0.shape;
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_shape(input0.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->set_exponent(input0.exponent);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input0;
|
||||
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -101,10 +110,14 @@ namespace dl
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input0.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -115,6 +128,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::min2d<true>(*this->output, input0, input1, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "min2d");
|
||||
}
|
||||
|
@ -21,14 +21,13 @@ namespace dl
|
||||
class Mul2D : public Layer
|
||||
{
|
||||
private:
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of Mul2D, if you don't specify anything, no activation is applied >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Mul2D >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Mul2D >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Mul2D >*/
|
||||
public:
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
|
||||
/**
|
||||
* @brief Construct a new Mul2D object.
|
||||
*
|
||||
@ -36,18 +35,24 @@ namespace dl
|
||||
* @param activation activation of Mul2D, if you don't specify anything, no activation is applied
|
||||
* @param name name of layer
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Mul2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name),
|
||||
output_exponent(output_exponent),activation(activation), output(NULL)
|
||||
Mul2D(const int output_exponent,
|
||||
const Activation<feature_t> *activation = NULL,
|
||||
const char *name = "Mul2D",
|
||||
bool inplace = false) : Layer(name),
|
||||
output_exponent(output_exponent),
|
||||
activation(activation),
|
||||
output(NULL),
|
||||
inplace(inplace),
|
||||
output_shape({})
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Multiply2D object.
|
||||
*/
|
||||
~Mul2D()
|
||||
~Mul2D()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -61,24 +66,34 @@ namespace dl
|
||||
*
|
||||
* @param input0 as one input
|
||||
* @param input1 as another input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
this->output_shape = input0.shape;
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(input0.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
|
||||
|
||||
else
|
||||
{
|
||||
this->output = &input0;
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -106,7 +121,11 @@ namespace dl
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -117,6 +136,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::mul2d<true>(*this->output, input0, input1, this->activation, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "mul2d");
|
||||
}
|
||||
|
@ -24,9 +24,9 @@ namespace dl
|
||||
int activation_exponent; /*<! exponent of quantized alpha elements >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of prelu >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of prelu >*/
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new PReLU object
|
||||
*
|
||||
@ -34,20 +34,25 @@ namespace dl
|
||||
* @param activation_exponent exponent of quantized alpha elements
|
||||
* @param name name of prelu
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
PReLU(const feature_t *activation_element, const int activation_exponent = 0, const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
|
||||
PReLU(const feature_t *activation_element,
|
||||
const int activation_exponent = 0,
|
||||
const char *name = NULL,
|
||||
bool inplace = "PReLU") : Layer(name),
|
||||
activation_element(activation_element),
|
||||
activation_exponent(activation_exponent),
|
||||
output(NULL),
|
||||
inplace(inplace),
|
||||
output_shape({})
|
||||
{
|
||||
this->activation_element = activation_element;
|
||||
this->activation_exponent = activation_exponent;
|
||||
this->inplace = inplace;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the PReLU object
|
||||
*
|
||||
*/
|
||||
~PReLU()
|
||||
~PReLU()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -59,23 +64,31 @@ namespace dl
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
if(!this->inplace)
|
||||
this->output_shape = input.shape;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->set_shape(input.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -99,11 +112,15 @@ namespace dl
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->apply_element();
|
||||
this->output->malloc_element();
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
@ -113,6 +130,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::prelu(*this->output, input, this->activation_element, this->activation_exponent, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "leakyrelu");
|
||||
}
|
||||
|
@ -21,29 +21,28 @@ namespace dl
|
||||
class ReLU : public Layer
|
||||
{
|
||||
private:
|
||||
Tensor<feature_t> *output; /*<! output ptr of relu >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of relu >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of relu >*/
|
||||
public:
|
||||
|
||||
|
||||
/**
|
||||
* @brief Construct a new ReLU object
|
||||
*
|
||||
* @param name name of relu
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
ReLU(const char *name = NULL, bool inplace = false) : Layer(name), output(NULL)
|
||||
ReLU(const char *name = "ReLU", bool inplace = false) : Layer(name),
|
||||
output(NULL), inplace(inplace), output_shape({})
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the ReLU object
|
||||
*
|
||||
*/
|
||||
~ReLU()
|
||||
~ReLU()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -55,23 +54,31 @@ namespace dl
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input)
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
if(!this->inplace)
|
||||
this->output_shape = input.shape;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->set_shape(input.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -95,10 +102,14 @@ namespace dl
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if(!this->inplace)
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output->malloc_element();
|
||||
this->output->set_exponent(input.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -109,6 +120,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::relu(*this->output, input, assign_core);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "relu");
|
||||
}
|
||||
|
@ -0,0 +1,124 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief Reshape(input)
|
||||
*
|
||||
* @tparam feature_t supports int16_t and int8_t,
|
||||
* - int16_t: stands for operation in int16_t quantize
|
||||
* - int8_t: stands for operation in int8_t quantize
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class Reshape : public Layer
|
||||
{
|
||||
private:
|
||||
int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Reshape >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Reshape >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Reshape object
|
||||
*
|
||||
* @param shape the target shape
|
||||
* @param name name of Reshape layer
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Reshape(std::vector<int> shape, const char *name = "Reshape", bool inplace = false) : Layer(name),
|
||||
output_shape(shape), inplace(inplace)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Reshape object
|
||||
*
|
||||
*/
|
||||
~Reshape()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
this->output_exponent = input.exponent;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& Reshape result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call Reshape operation.
|
||||
*
|
||||
* @param input as an input
|
||||
* @return Tensor<feature_t>& Reshape result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->reshape(this->output_shape);
|
||||
this->output->copy_element(input, true);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "reshape");
|
||||
}
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->reshape(this->output_shape);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "reshape");
|
||||
}
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -0,0 +1,127 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @tparam feature_t
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class Squeeze : public Layer
|
||||
{
|
||||
private:
|
||||
int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Squeeze >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
int axis; /*<! the dim to to be remove. make sure the length of the dim is equal to 1.
|
||||
if axis == INT32_MAX, all the dims with length==1 will be removed. >*/
|
||||
std::vector<int> output_shape; /*<! output shape of AvgPool2D >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Squeeze object
|
||||
*
|
||||
* @param axis the dim to to be remove. make sure the length of the dim is equal to 1.
|
||||
* if axis == INT32_MAX, all the dims with length==1 will be removed.
|
||||
* @param name name of Squeeze layer
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Squeeze(int axis = INT32_MAX, const char *name = "Squeeze", bool inplace = false) : Layer(name), axis(axis), inplace(inplace), output_shape({})
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Squeeze object
|
||||
*
|
||||
*/
|
||||
~Squeeze()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
this->output_exponent = input.exponent;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(input.shape);
|
||||
this->output->squeeze(this->axis);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(input.shape);
|
||||
this->output->squeeze(this->axis);
|
||||
}
|
||||
this->output_shape = this->output->shape;
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& Squeeze result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call Squeeze operation.
|
||||
*
|
||||
* @param input as an input
|
||||
* @return Tensor<feature_t>& Squeeze result
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->copy_element(input, true);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "Squeeze");
|
||||
}
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_shape(this->output_shape);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "Squeeze");
|
||||
}
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -21,13 +21,13 @@ namespace dl
|
||||
class Sub2D : public Layer
|
||||
{
|
||||
private:
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of Mul2D, if you don't specify anything, no activation is applied >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Sub2D >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a seperate memeory >*/
|
||||
const int output_exponent; /*<! exponent of output >*/
|
||||
const Activation<feature_t> *activation; /*<! activation of Sub2D, if you don't specify anything, no activation is applied >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Sub2D >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Sub2D >*/
|
||||
public:
|
||||
|
||||
/**
|
||||
* @brief Construct a new Sub2D object.
|
||||
*
|
||||
@ -35,18 +35,17 @@ namespace dl
|
||||
* @param activation activation of Mul2D, if you don't specify anything, no activation is applied
|
||||
* @param name name of layer
|
||||
* @param inplace true: the output will store to input0
|
||||
* false: the output will store to a seperate memeory
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Sub2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = NULL, bool inplace = false) : Layer(name),
|
||||
output_exponent(output_exponent), activation(activation), output(NULL)
|
||||
Sub2D(const int output_exponent, const Activation<feature_t> *activation = NULL, const char *name = "Sub2D", bool inplace = false) : Layer(name),
|
||||
output_exponent(output_exponent), activation(activation), output(NULL), inplace(inplace), output_shape({})
|
||||
{
|
||||
this->inplace = inplace;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Sub2D object.
|
||||
*/
|
||||
~Sub2D()
|
||||
~Sub2D()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
@ -60,22 +59,32 @@ namespace dl
|
||||
*
|
||||
* @param input0 as one input
|
||||
* @param input1 as another input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1)
|
||||
void build(Tensor<feature_t> &input0, Tensor<feature_t> &input1, bool print_shape = false)
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
this->output_shape = input0.shape;
|
||||
if (!this->inplace)
|
||||
{
|
||||
if(this->output != NULL)
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(input0.shape);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input0;
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -103,7 +112,11 @@ namespace dl
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output.apply_element();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
this->output.malloc_element();
|
||||
this->output->set_exponent(input0.exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "apply");
|
||||
|
||||
@ -114,6 +127,10 @@ namespace dl
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
if (this->output->shape != this->output_shape)
|
||||
{
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
nn::sub2d<true>(this->output, input0, input1, this->activation, assign_core, this->output_exponent);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "sub2d");
|
||||
}
|
||||
|
@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_tool.hpp"
|
||||
#include "dl_layer_base.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace layer
|
||||
{
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @tparam feature_t
|
||||
*/
|
||||
template <typename feature_t>
|
||||
class Transpose : public Layer
|
||||
{
|
||||
private:
|
||||
int output_exponent; /*<! exponent of output >*/
|
||||
Tensor<feature_t> *output; /*<! output ptr of Transpose >*/
|
||||
bool inplace; /*<! true: the output will store to input0
|
||||
false: the output will store to a separate memory >*/
|
||||
std::vector<int> perm; /*<! the new arangement of the dims. if perm == {}, the dims arangement will be reversed. >*/
|
||||
std::vector<int> output_shape; /*<! output shape of Transpose >*/
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Transpose object
|
||||
*
|
||||
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
|
||||
* @param name name of Transpose layer
|
||||
* @param inplace true: the output will store to input
|
||||
* false: the output will store to a separate memory
|
||||
*/
|
||||
Transpose(std::vector<int> perm = {}, const char *name = "Transpose", bool inplace = false) : Layer(name), perm(perm), inplace(inplace), output_shape({})
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Transpose object
|
||||
*
|
||||
*/
|
||||
~Transpose()
|
||||
{
|
||||
if ((!this->inplace) && (this->output != NULL))
|
||||
{
|
||||
delete this->output;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update output shape and exponent
|
||||
*
|
||||
* @param input as an input
|
||||
* @param print_shape whether to print the output shape.
|
||||
*/
|
||||
void build(Tensor<feature_t> &input, bool print_shape = false)
|
||||
{
|
||||
this->output_exponent = input.exponent;
|
||||
this->output_shape = input.shape;
|
||||
for (int i = 0; i < this->perm.size(); i++)
|
||||
{
|
||||
this->output_shape[i] = input.shape[this->perm[i]];
|
||||
}
|
||||
if (!this->inplace)
|
||||
{
|
||||
if (this->output != NULL)
|
||||
{
|
||||
this->output = new Tensor<feature_t>;
|
||||
}
|
||||
this->output->set_exponent(this->output_exponent);
|
||||
this->output->set_shape(this->output_shape);
|
||||
this->output->free_element();
|
||||
}
|
||||
else
|
||||
{
|
||||
this->output = &input;
|
||||
this->output->set_shape(this->output_shape);
|
||||
}
|
||||
|
||||
if (print_shape)
|
||||
{
|
||||
std::cout << this->name << " | ";
|
||||
this->output->print_shape();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the output
|
||||
*
|
||||
* @return Tensor<feature_t>& Transpose result
|
||||
*/
|
||||
Tensor<feature_t> &get_output()
|
||||
{
|
||||
return *this->output;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Call Transpose operation.
|
||||
*
|
||||
* @param input as an input.
|
||||
* @return Tensor<feature_t>& Transpose result.
|
||||
*/
|
||||
Tensor<feature_t> &call(Tensor<feature_t> &input)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_INIT();
|
||||
|
||||
if (!this->inplace)
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->set_exponent(input.exponent);
|
||||
this->output->transpose(input, this->perm);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "transpose");
|
||||
}
|
||||
else
|
||||
{
|
||||
DL_LOG_LAYER_LATENCY_START();
|
||||
this->output->transpose(this->perm);
|
||||
DL_LOG_LAYER_LATENCY_END(this->name, "transpose");
|
||||
}
|
||||
return *this->output;
|
||||
}
|
||||
};
|
||||
} // namespace layer
|
||||
} // namespace dl
|
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_image.hpp"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int area; /*!< Area of connected domains >*/
|
||||
std::vector<int> center; /*<! centroid of connected domains [x, y] >*/
|
||||
std::vector<int> box; /*<! [left_up_x, left_up_y, right_down_x, right_down_y] >*/
|
||||
} components_stats_t;
|
||||
|
||||
class ColorDetector
|
||||
{
|
||||
private:
|
||||
std::vector<std::vector<components_stats_t>> results; /*!< detection results >*/
|
||||
|
||||
public:
|
||||
std::vector<std::vector<uint8_t>> color_thresh; /*!< threshold of colors, The threshold of each color is composed of 6 numbers >*/
|
||||
std::vector<int> area_thresh; /*!< the area threshold of each color,
|
||||
the area that is smaller than the threshold is filtered >*/
|
||||
bool bgr; /*!< true: the input image is in BGR format
|
||||
false: the input image is in RGB format >*/
|
||||
|
||||
/**
|
||||
* @brief get the color threshold of rectangular region in the image
|
||||
*
|
||||
* @param image the input image
|
||||
* @param box the coordinates of the rectanglar region : [left_up_x, left_up_y, right_down_x, right_down_y]
|
||||
* @return std::vector<uint8_t> the threshold.
|
||||
*/
|
||||
std::vector<uint8_t> cal_color_thresh(dl::Tensor<uint8_t> &image, std::vector<int> box);
|
||||
|
||||
/**
|
||||
* @brief detect the colors based on the color thresholds
|
||||
*
|
||||
* @param image the input image.
|
||||
* @return std::vector<std::vector<components_stats_t>>& detection result.
|
||||
*/
|
||||
std::vector<std::vector<components_stats_t>> &detect(dl::Tensor<uint8_t> &image);
|
||||
|
||||
/**
|
||||
* @brief Construct a new Color Detector object
|
||||
*
|
||||
* @param color_thresh threshold of colors, The threshold of each color is composed of 6 numbers
|
||||
* @param area_thresh the area threshold of each color,the area that is smaller than the threshold is filtered
|
||||
* @param bgr true: the input image is in BGR format
|
||||
* false: the input image is in RGB format
|
||||
*/
|
||||
ColorDetector(std::vector<std::vector<uint8_t>> color_thresh, std::vector<int> area_thresh, bool bgr = false) : color_thresh(color_thresh), area_thresh(area_thresh), bgr(bgr)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the Color Detector object
|
||||
*
|
||||
*/
|
||||
~ColorDetector() {}
|
||||
|
||||
/**
|
||||
* @brief Get the results object
|
||||
*
|
||||
* @return std::vector<std::vector<components_stats_t>>& the detection result.
|
||||
*/
|
||||
std::vector<std::vector<components_stats_t>> &get_results()
|
||||
{
|
||||
return this->results;
|
||||
}
|
||||
};
|
@ -92,7 +92,7 @@ namespace face_recognition_tool
|
||||
* @return dl::Tensor<T>*
|
||||
*/
|
||||
template <typename T>
|
||||
dl::Tensor<T> *transform_mfn_input(dl::Tensor<uint8_t> &image, bool free_input = false, bool do_padding = true);
|
||||
dl::Tensor<T> *transform_mfn_input(dl::Tensor<uint8_t> &image, bool free_input = false);
|
||||
|
||||
/**
|
||||
* @brief transform the image to the input of a mfn model
|
||||
@ -106,7 +106,7 @@ namespace face_recognition_tool
|
||||
* false: do not pad the result
|
||||
*/
|
||||
template <typename T>
|
||||
void transform_mfn_input(dl::Tensor<uint8_t> &image, dl::Tensor<T> &output, bool free_input = false, bool do_padding = true);
|
||||
void transform_mfn_input(dl::Tensor<uint8_t> &image, dl::Tensor<T> &output, bool free_input = false);
|
||||
|
||||
/**
|
||||
* @brief transform the mfn output embedding to a floating embedding
|
||||
|
@ -14,13 +14,13 @@ namespace dl
|
||||
* @param filter_shape filter shape with dilation
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param pad_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET
|
||||
* @param pad_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN
|
||||
* @param is_conv2d one of true or false,
|
||||
* - true: serve for Conv2D
|
||||
* - false: serve for other operations
|
||||
* @return std::vector<int>
|
||||
*/
|
||||
std::vector<int> get_output_shape(const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t pad_type, const bool is_conv2d = false);
|
||||
std::vector<int> get_output_shape(const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t pad_type, const bool is_conv2d = false, std::vector<int> padding = {});
|
||||
|
||||
/**
|
||||
* @brief Get the pad size object
|
||||
@ -30,7 +30,7 @@ namespace dl
|
||||
* @param filter_shape filter shape with dilation
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN
|
||||
* @return padding size
|
||||
*/
|
||||
std::vector<int> get_pad_size(const std::vector<int> &output_shape, const std::vector<int> &input_shape, const std::vector<int> &filter_shape, const int stride_y, const int stride_x, const padding_type_t padding_type);
|
||||
|
@ -58,20 +58,20 @@ namespace dl
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto add2d(const int output_exponent,
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
|
||||
Tensor<feature_t> output;
|
||||
if constexpr(!inplace)
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -58,12 +58,12 @@ namespace dl
|
||||
* @param filter_shape filter_shape in [filter_height, filter_width]
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
|
||||
* - PADDING_VALID: no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* @param assign_core not effective yet
|
||||
* @return avg_pool2d result
|
||||
*/
|
||||
@ -81,19 +81,19 @@ namespace dl
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
std::vector<int> output_shape = get_output_shape(input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
std::vector<int> padding(4, 0);
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
|
||||
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
|
||||
{
|
||||
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
input.set_padding_size(padding);
|
||||
padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
}
|
||||
DL_LOG_NN_LATENCY_END("padding");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
avg_pool2d(output, input, input.padding, filter_shape, stride_y, stride_x, assign_core);
|
||||
avg_pool2d(output, input, padding, filter_shape, stride_y, stride_x, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("avg_pool2d");
|
||||
|
||||
return output;
|
||||
|
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_nn.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace nn
|
||||
{
|
||||
template <typename feature_t>
|
||||
void concat(Tensor<feature_t> &output, std::vector<Tensor<feature_t> *> &inputs, int axis, bool free_inputs = false);
|
||||
|
||||
template <typename feature_t>
|
||||
Tensor<feature_t> concat(std::vector<Tensor<feature_t> *> &inputs, int axis, bool free_inputs = false)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
assert(inputs.size() > 1);
|
||||
int shape_size = inputs[0]->shape.size();
|
||||
|
||||
if (axis < 0)
|
||||
{
|
||||
axis = shape_size + axis;
|
||||
}
|
||||
|
||||
assert((axis < shape_size) && (axis > -1));
|
||||
|
||||
int output_shape_axis = inputs[0]->shape[axis];
|
||||
|
||||
for (int i = 1; i < inputs.size(); i++)
|
||||
{
|
||||
assert(shape_size == inputs[i]->shape.size());
|
||||
assert(inputs[i]->exponent == inputs[i - 1]->exponent);
|
||||
output_shape_axis += inputs[i]->shape[axis];
|
||||
|
||||
for (int j = 0; j < shape_size; j++)
|
||||
{
|
||||
if (j != axis)
|
||||
{
|
||||
assert(inputs[i]->shape[j] == inputs[i - 1]->shape[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
DL_LOG_NN_LATENCY_END("assert");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
Tensor<feature_t> output;
|
||||
std::vector<int> output_shape = inputs[0]->shape;
|
||||
output_shape[axis] = output_shape_axis;
|
||||
output.set_shape(output_shape);
|
||||
output.set_exponent(inputs[0]->exponent);
|
||||
output.malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("malloc");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
concat(output, inputs, axis, free_inputs);
|
||||
DL_LOG_NN_LATENCY_END("concat");
|
||||
return output;
|
||||
}
|
||||
} // namespace nn
|
||||
} // namespace dl
|
@ -10,7 +10,6 @@ namespace dl
|
||||
{
|
||||
/**
|
||||
* @brief activation(conv2d(input, filter) + bias).
|
||||
* NOTE: When padding_type is SAME, make sure padding is already added in input.
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
@ -34,7 +33,6 @@ namespace dl
|
||||
|
||||
/**
|
||||
* @brief activation(conv2d(input, filter) + bias).
|
||||
* NOTE: When padding_type is SAME, make sure padding is already added in input.
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
@ -56,6 +54,29 @@ namespace dl
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(conv2d(input, filter) + bias).
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
* @param padding padding size needed in [top, bottom, left, right] of this operation
|
||||
* @param filter filter of conv2d
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param bias bias of conv2d, if you don't specify anything, no bias is added
|
||||
* @param activation activation of conv2d, if you don't specify anything, no activation is applied
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void conv2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
std::vector<int> &padding,
|
||||
const Filter<int8_t> &filter,
|
||||
const int stride_y,
|
||||
const int stride_x,
|
||||
const Bias<int16_t> *const bias = NULL,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(conv2d(input, filter) + bias).
|
||||
*
|
||||
@ -67,25 +88,25 @@ namespace dl
|
||||
* @param filter Filter of conv2d
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
|
||||
* - PADDING_VALID: no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* @param bias bias of conv2d, if you don't specify anything, no bias is added
|
||||
* @param activation activation of conv2d, if you don't specify anything, no activation is applied
|
||||
* @param assign_core not effective yet
|
||||
* @return conv2d result
|
||||
*/
|
||||
template <typename feature_t>
|
||||
template <typename feature_t, typename bias_t>
|
||||
Tensor<feature_t> conv2d(const int output_exponent,
|
||||
Tensor<feature_t> &input,
|
||||
const Filter<feature_t> &filter,
|
||||
const int stride_y,
|
||||
const int stride_x,
|
||||
const padding_type_t padding_type,
|
||||
const Bias<feature_t> *bias,
|
||||
const Bias<bias_t> *bias,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
|
||||
{
|
||||
@ -94,20 +115,19 @@ namespace dl
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
std::vector<int> output_shape = get_output_shape(input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type, true);
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
std::vector<int> padding(4, 0);
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
|
||||
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
|
||||
{
|
||||
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
|
||||
input.set_padding_size(padding);
|
||||
input.set_padding_value(padding, 0);
|
||||
padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
|
||||
}
|
||||
DL_LOG_NN_LATENCY_END("padding");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
conv2d(output, input, input.padding, filter, stride_y, stride_x, bias, activation, assign_core);
|
||||
conv2d(output, input, padding, filter, stride_y, stride_x, bias, activation, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("conv2d");
|
||||
|
||||
return output;
|
||||
|
@ -10,7 +10,6 @@ namespace dl
|
||||
{
|
||||
/**
|
||||
* @brief activate(depthwise_conv2d(input, filter) + bias)
|
||||
* NOTE: When padding_type is SAME, make sure padding is already added in input
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
@ -34,7 +33,6 @@ namespace dl
|
||||
|
||||
/**
|
||||
* @brief activate(depthwise_conv2d(input, filter) + bias)
|
||||
* NOTE: When padding_type is SAME, make sure padding is already added in input
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
@ -56,6 +54,29 @@ namespace dl
|
||||
const Activation<int8_t> *activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activate(depthwise_conv2d(input, filter) + bias)
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
* @param padding padding size needed in [top, bottom, left, right] of this operation
|
||||
* @param filter Filter of depthwise_conv2d
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param bias bias of depthwise_conv2d, if you don't specify anything, no bias is added
|
||||
* @param activation activation of depthwise_conv2d, if you don't specify anything, no activation is applied
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void depthwise_conv2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
std::vector<int> &padding,
|
||||
const Filter<int8_t> &filter,
|
||||
const int stride_y,
|
||||
const int stride_x,
|
||||
const Bias<int16_t> *bias = NULL,
|
||||
const Activation<int8_t> *activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(depthwise_conv2d(input, filter) + bias)
|
||||
*
|
||||
@ -67,25 +88,25 @@ namespace dl
|
||||
* @param filter filter of depthwise_conv2d
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param pad_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param pad_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
|
||||
* - PADDING_VALID means no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* @param bias bias of depthwise_conv2d, if you don't specify anything, no bias is added
|
||||
* @param activation activation of depthwise_conv2d, if you don't specify anything, no activation is applied
|
||||
* @param assign_core not effective yet
|
||||
* @return depthwise_conv2d result
|
||||
*/
|
||||
template <typename feature_t>
|
||||
template <typename feature_t, typename bias_t>
|
||||
Tensor<feature_t> depthwise_conv2d(const int output_exponent,
|
||||
Tensor<feature_t> &input,
|
||||
const Filter<feature_t> &filter,
|
||||
const int stride_y,
|
||||
const int stride_x,
|
||||
const padding_type_t padding_type,
|
||||
const Bias<feature_t> *bias,
|
||||
const Bias<bias_t> *bias,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
|
||||
{
|
||||
@ -94,20 +115,20 @@ namespace dl
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
std::vector<int> output_shape = get_output_shape(input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
std::vector<int> padding(4, 0);
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
|
||||
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
|
||||
{
|
||||
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
|
||||
input.set_padding_size(padding);
|
||||
input.set_padding_value(padding, 0);
|
||||
padding = get_pad_size(output_shape, input.shape, filter.shape_with_dilation, stride_y, stride_x, padding_type);
|
||||
}
|
||||
DL_LOG_NN_LATENCY_END("padding");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
depthwise_conv2d(output, input, input.padding, filter, stride_y, stride_x, bias, activation, assign_core);
|
||||
depthwise_conv2d(output, input, padding, filter, stride_y, stride_x, bias, activation, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("depthwise_conv2d");
|
||||
|
||||
return output;
|
||||
|
@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
#include "dl_constant.hpp"
|
||||
#include "dl_variable.hpp"
|
||||
#include "dl_nn.hpp"
|
||||
|
||||
namespace dl
|
||||
{
|
||||
namespace nn
|
||||
{
|
||||
/**
|
||||
* @brief activation(FullyConnected(input, filter) + bias).
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
* @param filter filter of FullyConnected
|
||||
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
|
||||
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
|
||||
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void fully_connected(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input,
|
||||
const Filter<int16_t> &filter,
|
||||
const Bias<int16_t> *const bias = NULL,
|
||||
const Activation<int16_t> *const activation = NULL,
|
||||
const bool flatten = true,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(FullyConnected(input, filter) + bias).
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
* @param filter filter of FullyConnected
|
||||
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
|
||||
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
|
||||
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void fully_connected(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
const Filter<int8_t> &filter,
|
||||
const Bias<int8_t> *const bias = NULL,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const bool flatten = true,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(FullyConnected(input, filter) + bias).
|
||||
*
|
||||
* @param output as an output
|
||||
* @param input as an input
|
||||
* @param filter filter of FullyConnected
|
||||
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
|
||||
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
|
||||
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void fully_connected(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
const Filter<int8_t> &filter,
|
||||
const Bias<int16_t> *const bias = NULL,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const bool flatten = true,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief activation(FullyConnected(input, filter) + bias).
|
||||
*
|
||||
* @tparam feature_t supports int16_t and int8_t,
|
||||
* - int16_t: stands for operation in int16_t quantize
|
||||
* - int8_t: stands for operation in int8_t quantize
|
||||
* @param output_exponent exponent of output
|
||||
* @param input as an input
|
||||
* @param filter Filter of FullyConnected
|
||||
* @param bias bias of FullyConnected, if you don't specify anything, no bias is added
|
||||
* @param activation activation of FullyConnected, if you don't specify anything, no activation is applied
|
||||
* @param flatten true: input shape is [x1, x2, ..., xn], filter shape is [1, 1, x1 * x2 * ... * xn, output_dim], output shape is [output_dim]
|
||||
* false: input shape is [x1, x2, ..., xn, input_dim], filter shape is [1, 1, input_dim, output_dim], output shape is [x1, x2, ...., xn, output_dim]
|
||||
* @param assign_core not effective yet
|
||||
* @return FullyConnected result
|
||||
*/
|
||||
template <typename feature_t>
|
||||
Tensor<feature_t> fully_connected(const int output_exponent,
|
||||
Tensor<feature_t> &input,
|
||||
const Filter<feature_t> &filter,
|
||||
const Bias<feature_t> *bias,
|
||||
const Activation<feature_t> *activation,
|
||||
const bool flatten,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
assert(filter.shape.size() == 4);
|
||||
assert(filter.shape[0] == 1);
|
||||
assert(filter.shape[1] == 1);
|
||||
|
||||
std::vector<int> output_shape;
|
||||
if (flatten)
|
||||
{
|
||||
assert(input.get_size() == filter.shape[2]);
|
||||
output_shape = {filter.shape.back()};
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(input.shape.back() == filter->shape[2]);
|
||||
output_shape = input.shape;
|
||||
output_shape[output_shape.size() - 1] = filter.shape.back();
|
||||
}
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
fully_connected(output, input, filter, bias, activation, flatten, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("fully_connected");
|
||||
|
||||
return output;
|
||||
}
|
||||
} // namespace nn
|
||||
} // namespace dl
|
@ -53,7 +53,7 @@ namespace dl
|
||||
std::vector<int> output_shape(input.shape.size(), 1);
|
||||
output_shape[2] = input.shape[2];
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -51,7 +51,7 @@ namespace dl
|
||||
std::vector<int> output_shape(input.shape.size(), 1);
|
||||
output_shape[2] = input.shape[2];
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(input.exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(input.exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -52,17 +52,17 @@ namespace dl
|
||||
* @return leakyrelu result or no return(result store to input)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto leakyrelu(Tensor<feature_t> &input,
|
||||
const int activation_alpha,
|
||||
const int activation_exponent,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto leakyrelu(Tensor<feature_t> &input,
|
||||
const int activation_alpha,
|
||||
const int activation_exponent,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
if constexpr(!inplace)
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -48,20 +48,20 @@ namespace dl
|
||||
* @return max2d result or no return(result store to input0)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto max2d(Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto max2d(Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
assert(input0.exponent == input1.exponent);
|
||||
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
|
||||
if constexpr(!inplace)
|
||||
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(input0.exponent).set_shape(input0.shape).apply_element();
|
||||
output.set_exponent(input0.exponent).set_shape(input0.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -57,12 +57,12 @@ namespace dl
|
||||
* @param filter_shape filter shape in [filter_height, filter_width]
|
||||
* @param stride_y stride in height
|
||||
* @param stride_x stride in width
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME or PADDING_SAME_MXNET,
|
||||
* @param padding_type one of PADDING_VALID or PADDING_SAME_END or PADDING_SAME_BEGIN,
|
||||
* - PADDING_VALID: no padding
|
||||
* PADDING_SAME and PADDING_SAME_MXNET results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* PADDING_SAME_END and PADDING_SAME_BEGIN results in padding with zeros evenly to the left/right or up/down of the input
|
||||
* such that output has the same height/width dimension as the input,
|
||||
* - PADDING_SAME results padding in TensorFlow style
|
||||
* - PADDING_SAME_MXNET results padding in MXNET style
|
||||
* - PADDING_SAME_END results padding in TensorFlow style
|
||||
* - PADDING_SAME_BEGIN results padding in MXNET style
|
||||
* @param assign_core not effective yet
|
||||
* @return max_pool2d result
|
||||
*/
|
||||
@ -79,20 +79,20 @@ namespace dl
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
std::vector<int> output_shape = get_output_shape(input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
Tensor<feature_t> output;
|
||||
output.set_exponent(input.exponent).set_shape(output_shape).apply_element();
|
||||
output.set_exponent(input.exponent).set_shape(output_shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
std::vector<int> padding(4, 0);
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
if (padding_type == PADDING_SAME || padding_type == PADDING_SAME_MXNET)
|
||||
if (padding_type == PADDING_SAME_END || padding_type == PADDING_SAME_BEGIN)
|
||||
{
|
||||
std::vector<int> padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
input.set_padding_size(padding);
|
||||
input.set_padding_value(padding, 0);
|
||||
padding = get_pad_size(output_shape, input.shape, filter_shape, stride_y, stride_x, padding_type);
|
||||
}
|
||||
DL_LOG_NN_LATENCY_END("padding");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
max_pool2d(output, input, input.padding, filter_shape, stride_y, stride_x, assign_core);
|
||||
max_pool2d(output, input, padding, filter_shape, stride_y, stride_x, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("max_pool2d");
|
||||
|
||||
return output;
|
||||
|
@ -47,20 +47,20 @@ namespace dl
|
||||
* @return min2d result or no return(result store to input0)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto min2d(Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto min2d(Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
assert(input0.exponent == input1.exponent);
|
||||
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
|
||||
if constexpr(!inplace)
|
||||
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(input0.exponent).set_shape(input0.shape).apply_element();
|
||||
output.set_exponent(input0.exponent).set_shape(input0.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -18,12 +18,12 @@ namespace dl
|
||||
* @param assign_core not effective yet
|
||||
* @param output_exponent exponent of output, only and must specify if inplace operation happens
|
||||
*/
|
||||
void mul2d(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input0,
|
||||
Tensor<int16_t> &input1,
|
||||
const Activation<int16_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
void mul2d(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input0,
|
||||
Tensor<int16_t> &input1,
|
||||
const Activation<int16_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
|
||||
/**
|
||||
* @brief activation(mul2d(input0, input1)).
|
||||
@ -35,12 +35,12 @@ namespace dl
|
||||
* @param assign_core not effective yet
|
||||
* @param output_exponent exponent of output, only and must specify if inplace operation happens
|
||||
*/
|
||||
void mul2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input0,
|
||||
Tensor<int8_t> &input1,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
void mul2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input0,
|
||||
Tensor<int8_t> &input1,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
|
||||
/**
|
||||
* @brief activation(mul2d(input0, input1)).
|
||||
@ -57,21 +57,21 @@ namespace dl
|
||||
* @return mul2d result or no return(result store to input0)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto mul2d(const int output_exponent,
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto mul2d(const int output_exponent,
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
|
||||
if constexpr(!inplace)
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -52,17 +52,17 @@ namespace dl
|
||||
* @return prelu result or no return(result store to input)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto prelu(Tensor<feature_t> &input,
|
||||
const feature_t *activation_element,
|
||||
const int activation_exponent,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto prelu(Tensor<feature_t> &input,
|
||||
const feature_t *activation_element,
|
||||
const int activation_exponent,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
if constexpr(!inplace)
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
@ -76,7 +76,7 @@ namespace dl
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
prelu(input, input, activation_element, activation_exponent, assign_core);
|
||||
DL_LOG_NN_LATENCY_END("prelu");
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace nn
|
||||
} // namespace dl
|
@ -15,9 +15,9 @@ namespace dl
|
||||
* @param input as an input
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void relu(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
void relu(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief relu(input).
|
||||
@ -26,9 +26,9 @@ namespace dl
|
||||
* @param input as an input
|
||||
* @param assign_core not effective yet
|
||||
*/
|
||||
void relu(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
void relu(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE);
|
||||
|
||||
/**
|
||||
* @brief relu(input)
|
||||
@ -46,11 +46,11 @@ namespace dl
|
||||
{
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
|
||||
if constexpr(!inplace)
|
||||
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).apply_element();
|
||||
output.set_exponent(input.exponent).set_shape(input.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -18,12 +18,12 @@ namespace dl
|
||||
* @param assign_core not effective yet
|
||||
* @param output_exponent exponent of output, only and must specify if inplace operation happens
|
||||
*/
|
||||
void sub2d(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input0,
|
||||
Tensor<int16_t> &input1,
|
||||
const Activation<int16_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
void sub2d(Tensor<int16_t> &output,
|
||||
Tensor<int16_t> &input0,
|
||||
Tensor<int16_t> &input1,
|
||||
const Activation<int16_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
|
||||
/**
|
||||
* @brief activation(sub2d(input0, input1)).
|
||||
@ -35,12 +35,12 @@ namespace dl
|
||||
* @param assign_core not effective yet
|
||||
* @param output_exponent exponent of output, only and must specify if inplace operation happens
|
||||
*/
|
||||
void sub2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input0,
|
||||
Tensor<int8_t> &input1,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
void sub2d(Tensor<int8_t> &output,
|
||||
Tensor<int8_t> &input0,
|
||||
Tensor<int8_t> &input1,
|
||||
const Activation<int8_t> *const activation = NULL,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE,
|
||||
const int output_exponent = INT_MIN);
|
||||
|
||||
/**
|
||||
* @brief activation(sub2d(input0, input1)).
|
||||
@ -57,20 +57,20 @@ namespace dl
|
||||
* @return sub2d result or no return(result store to input0)
|
||||
*/
|
||||
template <bool inplace = false, typename feature_t>
|
||||
auto sub2d(const int output_exponent,
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
auto sub2d(const int output_exponent,
|
||||
Tensor<feature_t> &input0,
|
||||
Tensor<feature_t> &input1,
|
||||
const Activation<feature_t> *activation,
|
||||
const std::vector<int> &assign_core = CONFIG_DEFAULT_ASSIGN_CORE) -> typename std::conditional<inplace, void, Tensor<feature_t>>::type
|
||||
{
|
||||
assert(input0.is_same_shape(input1));
|
||||
|
||||
DL_LOG_NN_LATENCY_INIT();
|
||||
Tensor<feature_t> output;
|
||||
if constexpr(!inplace)
|
||||
if constexpr (!inplace)
|
||||
{
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).apply_element();
|
||||
output.set_exponent(output_exponent).set_shape(input0.shape).malloc_element();
|
||||
DL_LOG_NN_LATENCY_END("apply");
|
||||
|
||||
DL_LOG_NN_LATENCY_START();
|
||||
|
@ -67,62 +67,49 @@ namespace dl
|
||||
void copy_memory(void *dst, void *src, const int n);
|
||||
|
||||
/**
|
||||
* @brief Apply memory without initialized. Must use free_aligned() to free the memory.
|
||||
* @brief Apply memory without initialized. Can use free_aligned() to free the memory.
|
||||
*
|
||||
* @param number number of elements
|
||||
* @param size size of element
|
||||
* @param align number of aligned, e.g., 16 means 16-byte aligned
|
||||
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
|
||||
* @return pointer of allocated memory. NULL for failed
|
||||
*/
|
||||
inline void *malloc_aligned(int number, int size, int align = 0)
|
||||
inline void *malloc_aligned(int number, int size, int align = 4)
|
||||
{
|
||||
int n = number * size;
|
||||
n >>= 4;
|
||||
n += 2;
|
||||
n <<= 4;
|
||||
int total_size = n + align + sizeof(void *) + sizeof(int);
|
||||
void *res = malloc(total_size);
|
||||
assert((align > 0) && (((align & (align-1)) == 0)));
|
||||
int total_size = number * size;
|
||||
|
||||
void *res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
|
||||
#if DL_SPIRAM_SUPPORT
|
||||
if (NULL == res)
|
||||
res = heap_caps_malloc(total_size, MALLOC_CAP_SPIRAM);
|
||||
res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_SPIRAM);
|
||||
#endif
|
||||
if (NULL == res)
|
||||
{
|
||||
printf("Fail to malloc %d bytes from DRAM(%d bytyes) and PSRAM(%d bytes), PSRAM is %s.\n",
|
||||
total_size,
|
||||
heap_caps_get_free_size(MALLOC_CAP_INTERNAL),
|
||||
heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
|
||||
heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
|
||||
DL_SPIRAM_SUPPORT ? "on" : "off");
|
||||
return NULL;
|
||||
}
|
||||
void **data = (void **)res + 2; // 4-byte for pointer, 4-bytes for n
|
||||
void **aligned;
|
||||
if (align)
|
||||
aligned = (void **)(((size_t)data + (align - 1)) & -align);
|
||||
else
|
||||
aligned = data;
|
||||
|
||||
aligned[-1] = res;
|
||||
int *temp = (int *)aligned;
|
||||
temp[-2] = n;
|
||||
|
||||
return (void *)aligned;
|
||||
return (void *)res;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Apply memory with zero-initialized. Must use dl_lib_free() to free the memory.
|
||||
* @brief Apply memory with zero-initialized. Can use free_aligned() to free the memory.
|
||||
*
|
||||
* @param number number of elements
|
||||
* @param size size of element
|
||||
* @param align number of aligned, e.g., 16 means 16-byte aligned
|
||||
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
|
||||
* @return pointer of allocated memory. NULL for failed
|
||||
*/
|
||||
inline void *calloc_aligned(int number, int size, int align = 0)
|
||||
inline void *calloc_aligned(int number, int size, int align = 4)
|
||||
{
|
||||
|
||||
void *aligned = malloc_aligned(number, size, align);
|
||||
int n = *((int *)aligned - 2);
|
||||
set_zero(aligned, n);
|
||||
set_zero(aligned, number * size);
|
||||
|
||||
return (void *)aligned;
|
||||
}
|
||||
@ -137,7 +124,70 @@ namespace dl
|
||||
if (NULL == address)
|
||||
return;
|
||||
|
||||
free(((void **)address)[-1]);
|
||||
heap_caps_free(address);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Apply memory without initialized in preference order: internal aligned, internal, external aligned
|
||||
*
|
||||
* @param number number of elements
|
||||
* @param size size of element
|
||||
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
|
||||
* @return pointer of allocated memory. NULL for failed
|
||||
*/
|
||||
inline void *malloc_aligned_prefer(int number, int size, int align = 4)
|
||||
{
|
||||
assert((align > 0) && (((align & (align-1)) == 0)));
|
||||
int total_size = number * size;
|
||||
void *res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
|
||||
if (NULL == res){
|
||||
res = heap_caps_malloc(total_size, MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL);
|
||||
}
|
||||
#if DL_SPIRAM_SUPPORT
|
||||
if (NULL == res){
|
||||
res = heap_caps_aligned_alloc(align, total_size, MALLOC_CAP_SPIRAM);
|
||||
}
|
||||
#endif
|
||||
if (NULL == res)
|
||||
{
|
||||
printf("Fail to malloc %d bytes from DRAM(%d bytyes) and PSRAM(%d bytes), PSRAM is %s.\n",
|
||||
total_size,
|
||||
heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL),
|
||||
heap_caps_get_free_size(MALLOC_CAP_SPIRAM),
|
||||
DL_SPIRAM_SUPPORT ? "on" : "off");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Apply memory with zero-initialized in preference order: internal aligned, internal, external aligned
|
||||
*
|
||||
* @param number number of elements
|
||||
* @param size size of element
|
||||
* @param align number of byte aligned, e.g., 16 means 16-byte aligned
|
||||
* @return pointer of allocated memory. NULL for failed
|
||||
*/
|
||||
inline void *calloc_aligned_prefer(int number, int size, int align = 4)
|
||||
{
|
||||
void *res = malloc_aligned_prefer(number, size, align);
|
||||
set_zero(res, number * size);
|
||||
|
||||
return (void *)res;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Free the calloc_aligned_prefer() and malloc_aligned_prefer() memory
|
||||
*
|
||||
* @param address pointer of memory to free
|
||||
*/
|
||||
inline void free_aligned_prefer(void *address)
|
||||
{
|
||||
if (NULL == address)
|
||||
return;
|
||||
|
||||
heap_caps_free(address);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -57,7 +57,8 @@ namespace dl
|
||||
* @param exponent exponent of element
|
||||
* @param shape shape of Filter,
|
||||
* - 1D: reserved
|
||||
* - 2D: [filter_height, filter_width, input_channel, output_channel]
|
||||
* - 2D: for convolution is [filter_height, filter_width, input_channel, output_channel],
|
||||
* for depthwise convolution is [filter_height, filter_width, input_channel, 1]
|
||||
* @param dilation dilation of Filter
|
||||
* - 1D: reserved
|
||||
* - 2D: [dilation_in_height, dilation_in_width]
|
||||
@ -97,6 +98,9 @@ namespace dl
|
||||
{
|
||||
public:
|
||||
using Constant<T>::Constant;
|
||||
std::vector<int> channel_exponent; /*<! exponent for per-channel >*/
|
||||
|
||||
Bias(const T *element, const std::vector<int> channel_exponent, const std::vector<int> shape);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "dl_tool.hpp"
|
||||
|
||||
@ -17,27 +18,20 @@ namespace dl
|
||||
class Tensor
|
||||
{
|
||||
private:
|
||||
int size; /*<! size of element including padding */
|
||||
bool auto_free; /*<! free element when object destroy */
|
||||
int size; /*<! size of element including padding */
|
||||
bool auto_free; /*<! free element when object destroy */
|
||||
std::vector<int> axis_offset; /*<! element offset of each axis */
|
||||
|
||||
public:
|
||||
T *element; /*<! point to element */
|
||||
int exponent; /*<! exponent of element */
|
||||
std::vector<int> shape; /*<! shape of Tensor */
|
||||
/*<! 2D: shape is [height, width, channel] */
|
||||
/*<! 1D: reserved */
|
||||
std::vector<int> shape_with_padding; /*<! shape with padding of Tensor */
|
||||
/*<! 2D: shape_with_padding is [height_with_padding, width_with_padding, channel_with_padding] */
|
||||
/*<! 1D: reserved */
|
||||
std::vector<int> padding; /*<! padding of Tensor */
|
||||
/*<!- 2D: padding format is [top, bottom, left, right] */
|
||||
/*<! - 1D: reserved */
|
||||
T *element; /*<! point to element */
|
||||
int exponent; /*<! exponent of element */
|
||||
std::vector<int> shape; /*<! shape of Tensor */
|
||||
|
||||
/**
|
||||
* @brief Construct a new Tensor object
|
||||
*
|
||||
*/
|
||||
Tensor() : size(-1), auto_free(true), element(NULL), exponent(0) {}
|
||||
Tensor() : auto_free(true), element(NULL), exponent(0) { this->set_shape({0}); }
|
||||
|
||||
/**
|
||||
* @brief Construct a new Tensor object by copying from input.
|
||||
@ -49,21 +43,20 @@ namespace dl
|
||||
*/
|
||||
Tensor(Tensor<T> &input, bool deep) : size(input.size),
|
||||
auto_free(input.auto_free),
|
||||
exponent(input.exponent),
|
||||
shape(input.shape),
|
||||
shape_with_padding(input.shape_with_padding),
|
||||
padding(input.padding)
|
||||
exponent(input.exponent)
|
||||
{
|
||||
if (deep)
|
||||
this->set_shape(input.shape);
|
||||
if (deep && (input.element != NULL))
|
||||
{
|
||||
int size_real = input.shape_with_padding.size() ? input.shape_with_padding[0] * input.shape_with_padding[1] * input.shape_with_padding[2] : 0;
|
||||
T *new_element = (T *)tool::calloc_aligned(size_real, sizeof(T), 16);
|
||||
int size_real = input.get_size();
|
||||
T *new_element = (T *)tool::calloc_aligned_prefer(size_real, sizeof(T), 16);
|
||||
tool::copy_memory(new_element, input.element, size_real * sizeof(T));
|
||||
this->element = new_element;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->element = input.element;
|
||||
this->auto_free = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,6 +70,33 @@ namespace dl
|
||||
this->free_element();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param input an input Tensor
|
||||
* @param deep one of true or false
|
||||
* - true: apply a new memory, copy value from input.element to this new memory
|
||||
* - false: take over input.element to this->element
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> ©_element(Tensor<T> &input, bool deep)
|
||||
{
|
||||
assert(this->get_size() == input.get_size());
|
||||
assert(input.element != NULL);
|
||||
|
||||
this->malloc_element();
|
||||
if (deep)
|
||||
{
|
||||
tool::copy_memory(this->element, input.element, this->get_size() * sizeof(T));
|
||||
}
|
||||
else
|
||||
{
|
||||
this->element = input.element;
|
||||
this->auto_free = false;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set the auto free object.
|
||||
*
|
||||
@ -120,190 +140,144 @@ namespace dl
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set the shape of Tensor. Initial this->padding = {0}. Initial this->size = -1.
|
||||
* @brief Set the shape of Tensor.
|
||||
*
|
||||
* @param shape shape in
|
||||
* - 2D: [height, width]
|
||||
* @param shape the target shape
|
||||
*
|
||||
* @return self
|
||||
*/
|
||||
Tensor<T> &set_shape(const std::vector<int> shape)
|
||||
Tensor<T> &set_shape(const std::vector<int> shape);
|
||||
|
||||
/**
|
||||
* @brief print the shape of the Tensor
|
||||
*
|
||||
*/
|
||||
void print_shape()
|
||||
{
|
||||
for (int i = 0; i < shape.size(); ++i)
|
||||
if (this->shape.size())
|
||||
{
|
||||
assert(shape[i] > 0);
|
||||
printf("shape = (");
|
||||
for (int i = 0; i < this->shape.size() - 1; i++)
|
||||
{
|
||||
printf("%d, ", this->shape[i]);
|
||||
}
|
||||
printf("%d)\n", this->shape.back());
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("shape = ()\n");
|
||||
}
|
||||
this->shape = shape;
|
||||
this->shape_with_padding = shape;
|
||||
this->size = -1;
|
||||
this->padding = std::vector<int>(((this->shape.size() - 1) << 1), 0);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set the padding size object.
|
||||
* @brief flatten the Tensor
|
||||
*
|
||||
* @param padding padding size in
|
||||
* - 2D: [top, bottom, left, right]
|
||||
* @return self
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor &set_padding_size(std::vector<int> &padding)
|
||||
{
|
||||
assert(this->shape.size()); // call Tensor.set_shape() first
|
||||
assert(this->shape.size() == 3); // TODO: || this->shape.size() == 2
|
||||
|
||||
if (this->shape.size() == 3)
|
||||
{
|
||||
std::vector<int> new_padding = this->padding;
|
||||
bool dont_update = true;
|
||||
|
||||
if (padding[0] > this->padding[0])
|
||||
{
|
||||
new_padding[0] = padding[0];
|
||||
dont_update = false;
|
||||
}
|
||||
|
||||
if (padding[1] > this->padding[1])
|
||||
{
|
||||
new_padding[1] = padding[1];
|
||||
dont_update = false;
|
||||
}
|
||||
|
||||
if (padding[2] > this->padding[2])
|
||||
{
|
||||
new_padding[2] = padding[2];
|
||||
dont_update = false;
|
||||
}
|
||||
|
||||
if (padding[3] > this->padding[3])
|
||||
{
|
||||
new_padding[3] = padding[3];
|
||||
dont_update = false;
|
||||
}
|
||||
|
||||
if (dont_update)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::vector<int> new_shape_with_padding = this->shape;
|
||||
|
||||
new_shape_with_padding[0] += (new_padding[0] + new_padding[1]);
|
||||
new_shape_with_padding[1] += (new_padding[2] + new_padding[3]);
|
||||
int new_size = new_shape_with_padding[0] * new_shape_with_padding[1] * new_shape_with_padding[2];
|
||||
|
||||
if (this->element) // if this->element != NULL, do padding by copy memory
|
||||
{
|
||||
T *new_element = (T *)tool::malloc_aligned(new_size, sizeof(T), 16);
|
||||
T *dst = new_element + ((new_padding[0] * new_shape_with_padding[1]) + new_padding[2]) * new_shape_with_padding[2];
|
||||
T *src = this->get_element_ptr();
|
||||
int offset_dst_next_y = new_shape_with_padding[1] * new_shape_with_padding[2]; // width * channel
|
||||
int src_copy_length = this->shape[1] * this->shape[2]; // width * channel
|
||||
int offset_src_next_y = this->shape_with_padding[1] * this->shape_with_padding[2]; // width * channel
|
||||
for (int y = 0; y < this->shape[0]; y++)
|
||||
{
|
||||
tool::copy_memory(dst, src, src_copy_length * sizeof(T));
|
||||
dst += offset_dst_next_y;
|
||||
src += offset_src_next_y;
|
||||
}
|
||||
|
||||
if (this->auto_free)
|
||||
tool::free_aligned(this->element);
|
||||
this->element = new_element;
|
||||
this->auto_free = true;
|
||||
}
|
||||
this->padding = new_padding;
|
||||
this->shape_with_padding = new_shape_with_padding;
|
||||
this->size = new_size;
|
||||
}
|
||||
else if (this->shape.size() == 2)
|
||||
{
|
||||
printf("Tensor.set_padding_size with this->shape.size() == 2 not implement yet.\n");
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
Tensor<T> &flatten();
|
||||
|
||||
/**
|
||||
* @brief Set the padding value object.
|
||||
* @brief Change a new shape to the Tensor without changing its data.
|
||||
*
|
||||
* @param padding padding size in
|
||||
* - 2D: [top, bottom, left, right]
|
||||
* @param value value to set
|
||||
* @return self
|
||||
* @param shape the target shape
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &set_padding_value(std::vector<int> &padding, T value);
|
||||
Tensor<T> &reshape(std::vector<int> shape);
|
||||
|
||||
/**
|
||||
* @brief Remove dims with length==1 from Tensor
|
||||
*
|
||||
* @param axis the dim to to be remove. make sure the length of the dim is equal to 1.
|
||||
* if axis == INT32_MAX, all the dims with length==1 will be removed.
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &squeeze(int axis = INT32_MAX);
|
||||
|
||||
/**
|
||||
* @brief Insert a new dim that will appear at the axis position in the expanded Tensor shape.
|
||||
*
|
||||
* @param axis the dim to be inserted
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &expand_dims(int axis);
|
||||
|
||||
/**
|
||||
* @brief Insert a new dim that will appear at the axis position in the expanded Tensor shape.
|
||||
*
|
||||
* @param axis the dim to be inserted
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &expand_dims(std::vector<int> axis);
|
||||
|
||||
/**
|
||||
* @brief Reverse or permute the axes of the Tensor
|
||||
*
|
||||
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &transpose(std::vector<int> perm = {});
|
||||
|
||||
/**
|
||||
* @brief Reverse or permute the axes of the input Tensor
|
||||
*
|
||||
* @param input the input Tensor
|
||||
* @param perm the new arangement of the dims. if perm == {}, the dims arangement will be reversed.
|
||||
* @return Tensor<T>& self
|
||||
*/
|
||||
Tensor<T> &transpose(Tensor<T> &input, std::vector<int> perm = {});
|
||||
|
||||
/**
|
||||
* @brief Get the element pointer.
|
||||
*
|
||||
* @param padding padding size in
|
||||
* - 2D: [top, bottom, left, right]
|
||||
* @return pointer to memory with padding
|
||||
* @return pointer to memory
|
||||
*/
|
||||
T *get_element_ptr(const std::vector<int> padding = {0, 0, 0, 0})
|
||||
T *get_element_ptr()
|
||||
{
|
||||
assert(this->shape.size() == 3); // TODO: || this->shape.size() == 2
|
||||
|
||||
if (this->shape.size() == 3)
|
||||
{
|
||||
return this->element + ((this->padding[0] - padding[0]) * this->shape_with_padding[1] + (this->padding[2] - padding[2])) * this->shape_with_padding[2];
|
||||
}
|
||||
else if (this->shape.size() == 2)
|
||||
{
|
||||
printf("Tensor.get_element_ptr with this->shape.size() == 2 is not implemented.\n");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return this->element;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the element value.
|
||||
*
|
||||
* @param index index in
|
||||
* - 2D: [y, x, c]
|
||||
* @param with_padding one of true or false,
|
||||
* - true: make padding size in count
|
||||
* - false: do not
|
||||
* @return element value
|
||||
* @param index the index of each dim.
|
||||
* @return T element value
|
||||
*/
|
||||
T &get_element_value(const std::vector<int> index, const bool with_padding = false)
|
||||
T get_element_value(const std::vector<int> index)
|
||||
{
|
||||
assert(index.size() == this->shape.size());
|
||||
assert(this->shape.size() == 3); // TODO: || this->shape() == 2
|
||||
|
||||
int i = 0;
|
||||
if (this->shape.size() == 3)
|
||||
{
|
||||
int y = index[0];
|
||||
int x = index[1];
|
||||
int c = index[2];
|
||||
i = with_padding ? (y * this->shape_with_padding[1] + x) * this->shape_with_padding[2] + c : ((y + this->padding[0]) * this->shape_with_padding[1] + x + this->padding[2]) * this->shape_with_padding[2] + c;
|
||||
}
|
||||
else if (this->shape.size() == 2)
|
||||
{
|
||||
printf("Tensor.get_element_value with this->shape.size() == 2 is not implemented.\n");
|
||||
}
|
||||
|
||||
return this->element[i];
|
||||
return this->element[this->get_element_index(index)];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the size of element.
|
||||
* @brief Get the element value.
|
||||
*
|
||||
* @return size of element including padding
|
||||
* @param index the index of the element.
|
||||
* @return T element value
|
||||
*/
|
||||
T get_element_value(int index)
|
||||
{
|
||||
return this->element[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the size of Tensor.
|
||||
*
|
||||
* @return the size of Tensor.
|
||||
*/
|
||||
int get_size()
|
||||
{
|
||||
if (this->size == -1) // didn't call Tensor.set_padding_size() before
|
||||
{
|
||||
this->size = 1;
|
||||
for (std::vector<int>::iterator d = this->shape.begin(); d != this->shape.end(); d++)
|
||||
this->size *= *d;
|
||||
}
|
||||
|
||||
return this->size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the axis offset
|
||||
*
|
||||
* @return std::vector<int> the axis offset
|
||||
*/
|
||||
std::vector<int> get_axis_offset()
|
||||
{
|
||||
return this->axis_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Apply memory with zero-initialized only if this->element is NULL.
|
||||
*
|
||||
@ -319,7 +293,7 @@ namespace dl
|
||||
if (this->element != NULL)
|
||||
return false;
|
||||
|
||||
this->element = (T *)dl::tool::calloc_aligned(this->get_size(), sizeof(T), 16);
|
||||
this->element = (T *)dl::tool::calloc_aligned_prefer(this->get_size(), sizeof(T), 16);
|
||||
this->auto_free = auto_free;
|
||||
|
||||
return true;
|
||||
@ -340,31 +314,7 @@ namespace dl
|
||||
if (this->element != NULL)
|
||||
return false;
|
||||
|
||||
this->element = (T *)tool::malloc_aligned(this->get_size(), sizeof(T), 16);
|
||||
this->auto_free = auto_free;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief If this->element != NULL no memory will be applied and no value will be set in padding.
|
||||
* Else apply memory without initialized and set value to padding.
|
||||
*
|
||||
* @param padding_value value to set in padding
|
||||
* @param auto_free one of true of false
|
||||
* - true: free element when object destroyed
|
||||
* - false: do not
|
||||
* @return
|
||||
* - true: apply memory and set padding value successfully
|
||||
* - false: no memory applied and no padding value set
|
||||
*/
|
||||
bool apply_element(const T padding_value = 0, const bool auto_free = true)
|
||||
{
|
||||
if (this->element != NULL)
|
||||
return false;
|
||||
|
||||
this->element = (T *)tool::malloc_aligned(this->get_size(), sizeof(T), 16);
|
||||
this->set_padding_value(this->padding, padding_value);
|
||||
this->element = (T *)tool::malloc_aligned_prefer(this->get_size(), sizeof(T), 16);
|
||||
this->auto_free = auto_free;
|
||||
|
||||
return true;
|
||||
@ -379,258 +329,56 @@ namespace dl
|
||||
{
|
||||
if (this->auto_free && this->element)
|
||||
{
|
||||
tool::free_aligned(this->element);
|
||||
tool::free_aligned_prefer(this->element);
|
||||
this->element = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Print the shape of Tensor in format "shape = ({top_padding} + {height} + {bottom_padding}, {left_padding} + {width} + {right_padding}, {channel}(channel_with_padding))\n".
|
||||
* @brief print the element of the tensor
|
||||
*
|
||||
* @param axis_index_range the element range of each dims to be print. if axis_index_range == {}, all the element will be print.
|
||||
* @param message to print
|
||||
*/
|
||||
void print_shape()
|
||||
{
|
||||
printf("shape = (%d + %d + %d, %d + %d + %d, %d(%d))\n",
|
||||
this->padding[0], this->shape[0], this->padding[1],
|
||||
this->padding[2], this->shape[1], this->padding[3],
|
||||
this->shape[2], this->shape_with_padding[2]);
|
||||
}
|
||||
void print(std::vector<int> axis_index_range = {}, const char *message = "");
|
||||
|
||||
/**
|
||||
* @brief Take numpy for example, this function print Tensor[y_start:y_end, x_start:x_end, c_start:c_end].
|
||||
* @brief print all the element of the Tensor.
|
||||
*
|
||||
* inner box is effective value of Tensor, "0" around is padding.
|
||||
*
|
||||
* (with padding)
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
* 000000(without padding) 00000000
|
||||
* 000000 00000000
|
||||
* 000000 00000000
|
||||
* 000000 effective value 00000000
|
||||
* 000000 00000000
|
||||
* 000000 00000000
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
* 00000000000000000000000000000000000000000000000000
|
||||
*
|
||||
* @param y_start start index in height
|
||||
* @param y_end end index in height
|
||||
* @param x_start start index in width
|
||||
* @param x_end end index in width
|
||||
* @param c_start start index in channel
|
||||
* @param c_end end index in channel
|
||||
* @param message to print
|
||||
* @param axis print aligned this axis, effective only if all y_end - y_start, x_end - x_start and c_end - c_start equals to 1
|
||||
* @param message to print
|
||||
* @param with_padding one of true or false,
|
||||
* - true: count from (with padding) in upper image
|
||||
* - false: count from (without padding) in upper image
|
||||
* - true: the padding element will also be ed
|
||||
* - false: the padding element will not be ed
|
||||
*/
|
||||
void print(int y_start, int y_end,
|
||||
int x_start, int x_end,
|
||||
int c_start, int c_end,
|
||||
const char *message, int axis = 0, const bool with_padding = false)
|
||||
void print_all(const char *message = "")
|
||||
{
|
||||
assert(y_end > y_start);
|
||||
assert(x_end > x_start);
|
||||
assert(c_end > c_start);
|
||||
|
||||
y_start = DL_MAX(y_start, 0);
|
||||
x_start = DL_MAX(x_start, 0);
|
||||
c_start = DL_MAX(c_start, 0);
|
||||
if (with_padding)
|
||||
{
|
||||
y_end = DL_MIN(y_end, this->shape_with_padding[0]);
|
||||
x_end = DL_MIN(x_end, this->shape_with_padding[1]);
|
||||
c_end = DL_MIN(c_end, this->shape_with_padding[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
y_end = DL_MIN(y_end, this->shape[0]);
|
||||
x_end = DL_MIN(x_end, this->shape[1]);
|
||||
c_end = DL_MIN(c_end, this->shape[2]);
|
||||
}
|
||||
|
||||
printf("%s[%d:%d, %d:%d, %d:%d] | ", message, y_start, y_end, x_start, x_end, c_start, c_end);
|
||||
std::cout << "\n"
|
||||
<< message << " | ";
|
||||
this->print_shape();
|
||||
|
||||
if (y_end - y_start == 1)
|
||||
for (int i = 0; i < this->get_size(); i++)
|
||||
{
|
||||
if (x_end - x_start == 1)
|
||||
{
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", c);
|
||||
printf("\n");
|
||||
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", this->get_element_value({y_start, x_start, c}, with_padding));
|
||||
printf("\n");
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c_end - c_start == 1)
|
||||
{
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
printf("%7d", x);
|
||||
printf("\n");
|
||||
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
printf("%7d", this->get_element_value({y_start, x, c_start}, with_padding));
|
||||
printf("\n");
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << this->element[i] << " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (x_end - x_start == 1)
|
||||
{
|
||||
if (c_end - c_start == 1)
|
||||
{
|
||||
for (int y = y_start; y < y_end; y++)
|
||||
printf("%7d", y);
|
||||
printf("\n");
|
||||
|
||||
for (int y = y_start; y < y_end; y++)
|
||||
printf("%7d", this->get_element_value({y, x_start, c_start}, with_padding));
|
||||
printf("\n");
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (y_end - y_start == 1)
|
||||
axis = 0;
|
||||
|
||||
if (x_end - x_start == 1)
|
||||
axis = 1;
|
||||
|
||||
if (c_end - c_start == 1)
|
||||
axis = 2;
|
||||
|
||||
if (axis == 0)
|
||||
{
|
||||
// ______c
|
||||
// |
|
||||
// |
|
||||
// x
|
||||
//
|
||||
for (int y = y_start; y < y_end; y++)
|
||||
{
|
||||
printf("y = %d\n ", y);
|
||||
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", c);
|
||||
printf("\n");
|
||||
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
{
|
||||
printf("%5d", x);
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", this->get_element_value({y, x, c}, with_padding));
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
else if (axis == 1)
|
||||
{
|
||||
// ______c
|
||||
// |
|
||||
// |
|
||||
// y
|
||||
//
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
{
|
||||
printf("x = %d\n ", x);
|
||||
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", c);
|
||||
printf("\n");
|
||||
|
||||
for (int y = y_start; y < y_end; y++)
|
||||
{
|
||||
printf("%5d", y);
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
printf("%7d", this->get_element_value({y, x, c}, with_padding));
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// ______x
|
||||
// |
|
||||
// |
|
||||
// y
|
||||
//
|
||||
for (int c = c_start; c < c_end; c++)
|
||||
{
|
||||
printf("c = %d\n ", c);
|
||||
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
printf("%7d", x);
|
||||
printf("\n");
|
||||
|
||||
for (int y = y_start; y < y_end; y++)
|
||||
{
|
||||
printf("%5d", y);
|
||||
for (int x = x_start; x < x_end; x++)
|
||||
printf("%7d", this->get_element_value({y, x, c}, with_padding));
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief print all the element of the Tensor.
|
||||
* @brief Get the index of each dims
|
||||
*
|
||||
* @param message to print
|
||||
* @param with_padding one of true or false,
|
||||
* - true: the padding element will also be printed
|
||||
* - false: the padding element will not be printed
|
||||
* @param element_index the index of the element
|
||||
* @return std::vector<int> the index of each dims
|
||||
*/
|
||||
void print_all(const char *message, const bool with_padding = false)
|
||||
{
|
||||
int y_end;
|
||||
int x_end;
|
||||
int c_end;
|
||||
if (with_padding)
|
||||
{
|
||||
y_end = this->shape_with_padding[0];
|
||||
x_end = this->shape_with_padding[1];
|
||||
c_end = this->shape_with_padding[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
y_end = this->shape[0];
|
||||
x_end = this->shape[1];
|
||||
c_end = this->shape[2];
|
||||
}
|
||||
std::vector<int> get_axis_index(int element_index);
|
||||
|
||||
printf("\n%s | ", message);
|
||||
this->print_shape();
|
||||
|
||||
for (int y = 0; y < y_end; y++)
|
||||
{
|
||||
for (int x = 0; x < x_end; x++)
|
||||
{
|
||||
for (int c = 0; c < c_end; c++)
|
||||
printf("%d ", this->get_element_value({y, x, c}, with_padding));
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
return;
|
||||
}
|
||||
/**
|
||||
* @brief Get the index of element
|
||||
*
|
||||
* @param axis_index the index of each dims
|
||||
* @return int the index of element
|
||||
*/
|
||||
int get_element_index(const std::vector<int> axis_index);
|
||||
|
||||
/**
|
||||
* @brief Check the element value with input ground-truth.
|
||||
@ -638,35 +386,39 @@ namespace dl
|
||||
* @param gt_element ground-truth value of element
|
||||
* @param bias permissible error
|
||||
* @param info one of true or false
|
||||
* - true: print shape and result
|
||||
* - true: shape and result
|
||||
* - false: do not
|
||||
* @param failed_number maximum number of wrong element that will be printed
|
||||
*
|
||||
* @return
|
||||
* - true: in permissible error
|
||||
* - false: not
|
||||
*/
|
||||
bool check_element(T *gt_element, int bias = 2, bool info = true)
|
||||
bool check_element(T *gt_element, int bias = 2, bool info = true, int failed_number = 0)
|
||||
{
|
||||
int count = 0;
|
||||
if (info)
|
||||
this->print_shape();
|
||||
int i = 0;
|
||||
for (int y = 0; y < this->shape[0]; y++)
|
||||
int size = this->get_size();
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
for (int x = 0; x < this->shape[1]; x++)
|
||||
if (DL_ABS(this->element[i] - gt_element[i]) > bias)
|
||||
{
|
||||
for (int c = 0; c < this->shape[2]; c++)
|
||||
std::vector<int> index = get_axis_index(i);
|
||||
std::cout << "element[";
|
||||
for (int j = 0; j < index.size() - 1; j++)
|
||||
{
|
||||
int a = this->get_element_value({y, x, c});
|
||||
int b = gt_element[i];
|
||||
int offset = DL_ABS(a - b);
|
||||
if (offset > bias)
|
||||
{
|
||||
printf("element[%d, %d, %d]: %d v.s. %d\n", y, x, c, a, b);
|
||||
return false;
|
||||
}
|
||||
i++;
|
||||
std::cout << index[j] << ", ";
|
||||
}
|
||||
std::cout << index.back() << "]: ";
|
||||
std::cout << +this->element[i] << " v.s. " << +gt_element[i] << "\n";
|
||||
count++;
|
||||
if (count > failed_number)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (count)
|
||||
return false;
|
||||
|
||||
if (info)
|
||||
printf("PASS\n");
|
||||
@ -700,35 +452,44 @@ namespace dl
|
||||
|
||||
Tensor<T> &operator=(const Tensor<T> &input)
|
||||
{
|
||||
this->size = input.size;
|
||||
this->auto_free = input.auto_free;
|
||||
this->exponent = input.exponent;
|
||||
this->shape = input.shape;
|
||||
this->padding = input.padding;
|
||||
int size_real_tmp = this->shape_with_padding.size() ? this->shape_with_padding[0] * this->shape_with_padding[1] * this->shape_with_padding[2] : 0;
|
||||
int size_input_real = input.shape_with_padding.size() ? input.shape_with_padding[0] * input.shape_with_padding[1] * input.shape_with_padding[2] : 0;
|
||||
this->shape_with_padding = input.shape_with_padding;
|
||||
if (this->element)
|
||||
int size_real_tmp = this->size;
|
||||
int size_input_real = input.size;
|
||||
this->set_shape(input.shape);
|
||||
if (input.element)
|
||||
{
|
||||
if (size_real_tmp != size_input_real)
|
||||
if (this->element)
|
||||
{
|
||||
tool::free_aligned(this->element);
|
||||
T *new_element = (T *)tool::calloc_aligned(size_input_real, sizeof(T), 16);
|
||||
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
|
||||
this->element = new_element;
|
||||
if (size_real_tmp != size_input_real)
|
||||
{
|
||||
tool::free_aligned_prefer(this->element);
|
||||
T *new_element = (T *)tool::malloc_aligned_prefer(size_input_real, sizeof(T), 16);
|
||||
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
|
||||
this->element = new_element;
|
||||
}
|
||||
else
|
||||
{
|
||||
tool::copy_memory(this->element, input.element, size_input_real * sizeof(T));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tool::copy_memory(this->element, input.element, size_input_real * sizeof(T));
|
||||
T *new_element = (T *)tool::malloc_aligned_prefer(size_input_real, sizeof(T), 16);
|
||||
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
|
||||
this->element = new_element;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
else
|
||||
{
|
||||
T *new_element = (T *)tool::calloc_aligned(size_input_real, sizeof(T), 16);
|
||||
tool::copy_memory(new_element, input.element, size_input_real * sizeof(T));
|
||||
this->element = new_element;
|
||||
if (this->element)
|
||||
{
|
||||
tool::free_aligned_prefer(this->element);
|
||||
this->element = NULL;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
} // namespace dl
|
Reference in New Issue
Block a user