forked from espressif/arduino-esp32
esp-dsp: master 6b25cbb esp-face: master d141502 esp-rainmaker: f1b82c7 esp32-camera: master 61400bc esp_littlefs: master 3c29afc
492 lines
24 KiB
C++
492 lines
24 KiB
C++
#pragma once
|
||
|
||
#include <stdint.h>
|
||
#include <stdlib.h>
|
||
#include <math.h>
|
||
#include <vector>
|
||
#include "dl_define.hpp"
|
||
#include "dl_variable.hpp"
|
||
#include "dl_math_matrix.hpp"
|
||
|
||
namespace dl
|
||
{
|
||
namespace image
|
||
{
|
||
typedef enum
|
||
{
|
||
IMAGE_RESIZE_BILINEAR = 0, /*<! Resize image by taking bilinear of four pixels */
|
||
IMAGE_RESIZE_MEAN = 1, /*<! Resize image by taking mean of four pixels */
|
||
IMAGE_RESIZE_NEAREST = 2 /*<! Resize image by taking the nearest pixel */
|
||
} resize_type_t;
|
||
|
||
/**
|
||
* @brief Convert RGB888 pixel to Gray.
|
||
*
|
||
* @param red red value
|
||
* @param green green value
|
||
* @param blue blue value
|
||
* @return gray value
|
||
*/
|
||
inline uint8_t convert_pixel_rgb888_to_gray(int red, int green, int blue)
|
||
{
|
||
int temp = (red * 38 + green * 75 + blue * 15) >> 7;
|
||
return DL_CLIP(temp, 0, 255);
|
||
}
|
||
|
||
/**
|
||
* @brief Convert RGB565 pixel to RGB888.
|
||
*
|
||
* @tparam T supports all integer types
|
||
* @param input pixel value in RGB565
|
||
* @param output pixel value in RGB888
|
||
*/
|
||
template <typename T>
|
||
inline void convert_pixel_rgb565_to_rgb888(uint16_t input, T *output)
|
||
{
|
||
output[0] = (input & 0x1F00) >> 5; // blue
|
||
output[1] = ((input & 0x7) << 5) | ((input & 0xE000) >> 11); // green
|
||
output[2] = input & 0xF8; // red
|
||
}
|
||
|
||
/**
|
||
* @brief Convert RGB565 image to RGB888 image.
|
||
*
|
||
* @param image ptr of RGB565 image
|
||
* @param image_shape shape of the input image
|
||
* @return Tensor<uint8_t>* output RGB88 image
|
||
*/
|
||
Tensor<uint8_t> *convert_image_rgb565_to_rgb888(uint16_t *image, std::vector<int> &image_shape);
|
||
|
||
/**
|
||
* @brief Convert RGB565 pixel to Gray.
|
||
*
|
||
* @param input pixel value in RGB565
|
||
* @return pixel value in Gray
|
||
*/
|
||
inline uint8_t convert_pixel_rgb565_to_gray(uint16_t input)
|
||
{
|
||
int blue = (input & 0x1F00) >> 5; // blue
|
||
int green = ((input & 0x7) << 5) | ((input & 0xE000) >> 11); // green
|
||
int red = input & 0xF8; // red
|
||
|
||
return convert_pixel_rgb888_to_gray(red, green, blue);
|
||
}
|
||
|
||
/**
|
||
* @brief Crop a patch from image and resize and store to destination image.
|
||
* If the cropping box is out of image, destination image will be padded with edge.
|
||
*
|
||
* The outer rectangle is the entire output image.
|
||
* The inner rectangle is where the resized image will be stored.
|
||
* In other world, this function could help you do padding while resize image.
|
||
* ___________________________(dst_w)__________________
|
||
* | ___________________________ |
|
||
* | |(x_start, y_start) | |
|
||
* | | | |
|
||
* | | | |
|
||
* (dst_h)| | | |
|
||
* | | | |
|
||
* | | | |
|
||
* | |___________________________|(x_end, y_end) |
|
||
* |____________________________________________________|
|
||
*
|
||
* @tparam T suppot all integer types
|
||
* @param dst_image pointer of destination(output) image
|
||
* @param dst_width destination image width
|
||
* @param dst_channel destination image channel number
|
||
* @param dst_y_start start y of resized image in destination image
|
||
* @param dst_y_end end y of resized image in destination image
|
||
* @param dst_x_start start x of resized image in destination image
|
||
* @param dst_x_end end x of resized image in destination image
|
||
* @param src_image pointer of source image
|
||
* @param src_height source image height
|
||
* @param src_width source image width
|
||
* @param src_channel source image channel
|
||
* @param src_y_start start y of resized image in source image
|
||
* @param src_y_end end y of resized image in source image
|
||
* @param src_x_start start x of resized image in source image
|
||
* @param src_x_end end x of resized image in source image
|
||
* @param resize_type one of IMAGE_RESIZE_BILINEAR or IMAGE_RESIZE_MEAN or IMAGE_RESIZE_NEAREST
|
||
* @param shift_left bit left shift number implemented on output
|
||
*/
|
||
template <typename T>
|
||
void crop_and_resize(T *dst_image,
|
||
int dst_width,
|
||
int dst_channel,
|
||
int dst_y_start, int dst_y_end,
|
||
int dst_x_start, int dst_x_end,
|
||
uint16_t *src_image,
|
||
int src_height,
|
||
int src_width,
|
||
int src_channel,
|
||
int src_y_start, int src_y_end,
|
||
int src_x_start, int src_x_end,
|
||
resize_type_t resize_type = IMAGE_RESIZE_NEAREST,
|
||
int shift_left = 0);
|
||
|
||
/**
|
||
* @brief Crop a patch from image and resize and store to destination image.
|
||
* If the cropping box is out of image, destination image will be padded with edge.
|
||
*
|
||
* The outer rectangle is the entire output image.
|
||
* The inner rectangle is where the resized image will be stored.
|
||
* In other world, this function could help you do padding while resize image.
|
||
* ___________________________(dst_w)__________________
|
||
* | ___________________________ |
|
||
* | |(x_start, y_start) | |
|
||
* | | | |
|
||
* | | | |
|
||
* (dst_h)| | | |
|
||
* | | | |
|
||
* | | | |
|
||
* | |___________________________|(x_end, y_end) |
|
||
* |____________________________________________________|
|
||
*
|
||
* @tparam T suppot all integer types
|
||
* @param dst_image pointer of destination(output) image
|
||
* @param dst_width destination image width
|
||
* @param dst_channel destination image channel number
|
||
* @param dst_y_start start y of resized image in destination image
|
||
* @param dst_y_end end y of resized image in destination image
|
||
* @param dst_x_start start x of resized image in destination image
|
||
* @param dst_x_end end x of resized image in destination image
|
||
* @param src_image pointer of source image
|
||
* @param src_height source image height
|
||
* @param src_width source image width
|
||
* @param src_channel source image channel
|
||
* @param src_y_start start y of resized image in source image
|
||
* @param src_y_end end y of resized image in source image
|
||
* @param src_x_start start x of resized image in source image
|
||
* @param src_x_end end x of resized image in source image
|
||
* @param resize_type one of IMAGE_RESIZE_BILINEAR or IMAGE_RESIZE_MEAN or IMAGE_RESIZE_NEAREST
|
||
* @param shift_left bit left shift number implemented on output
|
||
*/
|
||
template <typename T>
|
||
void crop_and_resize(T *dst_image,
|
||
int dst_width,
|
||
int dst_channel,
|
||
int dst_y_start, int dst_y_end,
|
||
int dst_x_start, int dst_x_end,
|
||
uint8_t *src_image,
|
||
int src_height,
|
||
int src_width,
|
||
int src_channel,
|
||
int src_y_start, int src_y_end,
|
||
int src_x_start, int src_x_end,
|
||
resize_type_t resize_type = IMAGE_RESIZE_NEAREST,
|
||
int shift_left = 0);
|
||
|
||
/**
|
||
* @brief Draw a filled rectangle on RGB888 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x1 left up corner x
|
||
* @param y1 left up corner y
|
||
* @param x2 right bottom corner x
|
||
* @param y2 right bottom corner y
|
||
* @param color 0x 00| 00| 00| 00
|
||
* reserved|channel 0|channel 1|channel 2
|
||
*/
|
||
void draw_filled_rectangle(uint8_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2,
|
||
const uint32_t color = 0x00FF0000);
|
||
|
||
/**
|
||
* @brief Draw a filled rectangle on RGB565 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x1 left up corner x
|
||
* @param y1 left up corner y
|
||
* @param x2 right bottom corner x
|
||
* @param y2 right bottom corner y
|
||
* @param color 0b 000| 00000| 00000| 000
|
||
* channel 1[2:0]|channel 0|channel 2|channel 1[5:3]
|
||
*/
|
||
void draw_filled_rectangle(uint16_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2,
|
||
const uint16_t color = 0b0001111100000000);
|
||
|
||
/**
|
||
* @brief Draw a point on RGB888 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x point x
|
||
* @param y point y
|
||
* @param size size of point
|
||
* @param color 0x 00| 00| 00| 00
|
||
* reserved|channel 0|channel 1|channel 2
|
||
*/
|
||
void draw_point(uint8_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
const uint32_t x, const uint32_t y, const uint32_t size,
|
||
const uint32_t color = 0x00FF0000);
|
||
|
||
/**
|
||
* @brief Draw a point on RGB565 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x point x
|
||
* @param y point y
|
||
* @param size size of point
|
||
* @param color 0b 000| 00000| 00000| 000
|
||
* channel 1[2:0]|channel 0|channel 2|channel 1[5:3]
|
||
*/
|
||
void draw_point(uint16_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
const uint32_t x, const uint32_t y, const uint32_t size,
|
||
uint16_t color = 0b0001111100000000);
|
||
|
||
/**
|
||
* @brief Draw a hollow rectangle on RGB888 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x1 left up corner x
|
||
* @param y1 left up corner y
|
||
* @param x2 right bottom corner x
|
||
* @param y2 right bottom corner y
|
||
* @param color 0x 00| 00| 00| 00
|
||
* reserved|channel 0|channel 1|channel 2
|
||
*/
|
||
void draw_hollow_rectangle(uint8_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2,
|
||
uint32_t color = 0x00FF0000);
|
||
|
||
/**
|
||
* @brief Draw a hollow rectangle on RGB565 image.
|
||
*
|
||
* @param image pointer of input image
|
||
* @param image_height height of input image
|
||
* @param image_width width of input image
|
||
* @param x1 left up corner x
|
||
* @param y1 left up corner y
|
||
* @param x2 right bottom corner x
|
||
* @param y2 right bottom corner y
|
||
* @param color 0b 000| 00000| 00000| 000
|
||
* channel 1[2:0]|channel 0|channel 2|channel 1[5:3]
|
||
*/
|
||
void draw_hollow_rectangle(uint16_t *image, const uint32_t image_height, const uint32_t image_width,
|
||
uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2,
|
||
const uint16_t color = 0b0001111100000000);
|
||
|
||
/**
|
||
* @brief Detect target moving by activated detection point number. Each cross in the figure below is a detection point.
|
||
* Once abs(frame_1_detection_point[i] - frame_2_detection_point[i]) > threshold, this detection point is activated.
|
||
* This function will return the number of activated detection point.
|
||
*
|
||
* __stride__________________________
|
||
* | | | | |
|
||
* stride | | | | |
|
||
* | | | | |
|
||
* |________|________|________| |
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________| height
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________| |
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________|___|___
|
||
* | |
|
||
* |__________width___________|
|
||
* | |
|
||
*
|
||
* Time consumption:
|
||
* Frame shape = (240, 240)
|
||
* Both frame are in PSRAM
|
||
* On ESP32-S3 with CPU 240MHz, QSPI 80MHz
|
||
*
|
||
* stride latency
|
||
* 1 28316us
|
||
* 2 8770us
|
||
* 4 3622us
|
||
* 8 1990us
|
||
* 16 880us
|
||
* 32 260us
|
||
*
|
||
*
|
||
* In a application, outside this function, threshold of activated detection point number is needed.
|
||
* Once activated detection point number > number_threshold, this two frame are judged target moved.
|
||
* How to determine the number_threshold?
|
||
* Let's assume that the minimize shape of target is (target_min_height, target_max_width).
|
||
* Then, the number_threshold = [target_min_height / stride] * [target_max_width / stride] * ratio,
|
||
* where ratio is in (0, 1), the smaller the ratio is, the more sensitive the detector is, the more false detected.
|
||
*
|
||
*
|
||
* @param f1 one frame in RGB565
|
||
* @param f2 another frame in RGB565
|
||
* @param height height of frame
|
||
* @param width width of frame
|
||
* @param stride stride of detection point, the smaller the stride is, the more reliable the detector is.
|
||
* @param threshold activation threshold of each detection point
|
||
* @return activated detection point number
|
||
*/
|
||
uint32_t get_moving_point_number(uint16_t *f1, uint16_t *f2, const uint32_t height, const uint32_t width, const uint32_t stride, const uint32_t threshold = 5);
|
||
|
||
/**
|
||
* @brief Detect target moving by activated detection point number. Each cross in the figure below is a detection point.
|
||
* Once abs(frame_1_detection_point[i] - frame_2_detection_point[i]) > threshold, this detection point is activated.
|
||
* This function will return the number of activated detection point.
|
||
*
|
||
* __stride__________________________
|
||
* | | | | |
|
||
* stride | | | | |
|
||
* | | | | |
|
||
* |________|________|________| |
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________| height
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________| |
|
||
* | | | | |
|
||
* | | | | |
|
||
* | | | | |
|
||
* |________|________|________|___|___
|
||
* | |
|
||
* |__________width___________|
|
||
* | |
|
||
*
|
||
*
|
||
* In a application, outside this function, threshold of activated detection point number is needed.
|
||
* Once activated detection point number > number_threshold, this two frame are judged target moved.
|
||
* How to determine the number_threshold?
|
||
* Let's assume that the minimize shape of target is (target_min_height, target_max_width).
|
||
* Then, the number_threshold = [target_min_height / stride] * [target_max_width / stride] * ratio,
|
||
* where ratio is in (0, 1), the smaller the ratio is, the more sensitive the detector is, the more false detected.
|
||
*
|
||
*
|
||
* @param f1 one frame in RGB888
|
||
* @param f2 another frame in RGB888
|
||
* @param height height of frame
|
||
* @param width width of frame
|
||
* @param stride stride of detection point, the smaller the stride is, the more reliable the detector is.
|
||
* @param threshold activation threshold of each detection point
|
||
* @return activated detection point number
|
||
*/
|
||
uint32_t get_moving_point_number(uint8_t *f1, uint8_t *f2, const uint32_t height, const uint32_t width, const uint32_t stride, const uint32_t threshold = 5);
|
||
|
||
/**
|
||
* @brief Apply an affine transformation to an image.
|
||
*
|
||
* @tparam T
|
||
* @param input the input image.
|
||
* @param output the output image.
|
||
* @param M_inv the inverse transformation matrix.
|
||
*/
|
||
template <typename T>
|
||
void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
|
||
|
||
/**
|
||
* @brief Apply an affine transformation to an image.
|
||
*
|
||
* @tparam T
|
||
* @param input the pointer of the input image.
|
||
* @param shape the shape of the input image.
|
||
* @param output the output image.
|
||
* @param M_inv the inverse transformation matrix.
|
||
*/
|
||
template <typename T>
|
||
void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
|
||
|
||
/**
|
||
* @brief Get the otsu thresh object.
|
||
*
|
||
* @param image the gray image.
|
||
* @return uint8_t the otsu thresh.
|
||
*/
|
||
uint8_t get_otsu_thresh(Tensor<uint8_t> &image);
|
||
|
||
/**
|
||
* @brief Convert RGB image to gray image
|
||
*
|
||
* @param image input image
|
||
* @param bgr true: the image is in BGR format
|
||
* false: the image is in RGB format
|
||
* @return Tensor<uint8_t>* output image in gray format
|
||
*/
|
||
Tensor<uint8_t> *rgb2gray(Tensor<uint8_t> &image, bool bgr = false);
|
||
|
||
/**
|
||
* @brief Convert RGB image to LAB image
|
||
*
|
||
* @param image input image
|
||
* @param bgr true: the image is in BGR format
|
||
* false: the image is in RGB format
|
||
* @param fast true: use the fast alogrithm, but the accuracy will be reduced
|
||
* false: do not use the fast alogrithm
|
||
* @return Tensor<uint8_t>* output image in LAB foramt
|
||
*/
|
||
Tensor<uint8_t> *rgb2lab(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
|
||
|
||
/**
|
||
* @brief Convert RGB image to HSV image
|
||
*
|
||
* @param image input image
|
||
* @param bgr true: the image is in BGR format
|
||
* false: the image is in RGB format
|
||
* @param fast true: use the fast alogrithm, but the accuracy will be reduced
|
||
* false: do not use the fast alogrithm
|
||
* @return Tensor<uint8_t>* output image in HSV format
|
||
*/
|
||
Tensor<uint8_t> *rgb2hsv(Tensor<uint8_t> &image, bool bgr = false, bool fast = true);
|
||
|
||
/**
|
||
* @brief resize an image to the target shape.
|
||
*
|
||
* @param image the input image Tensor
|
||
* @param target_shape the target shape of the resized image.
|
||
* @param resize_type one of IMAGE_RESIZE_BILINEAR or IMAGE_RESIZE_MEAN or IMAGE_RESIZE_NEAREST
|
||
* @return Tensor<uint8_t>* the pointer of the resized image Tensor
|
||
*/
|
||
Tensor<uint8_t> *resize_image(Tensor<uint8_t> &image, std::vector<int> target_shape, resize_type_t resize_type);
|
||
|
||
/**
|
||
* @brief resize an image to the target shape.
|
||
*
|
||
* @param image the input image Tensor
|
||
* @param resized_image the resized image Tensor
|
||
* @param resize_type one of IMAGE_RESIZE_BILINEAR or IMAGE_RESIZE_MEAN or IMAGE_RESIZE_NEAREST
|
||
*/
|
||
void resize_image(Tensor<uint8_t> &image, Tensor<uint8_t> &resized_image, resize_type_t resize_type);
|
||
|
||
/**
|
||
* @brief resize an image to the target shape with nearest method.
|
||
*
|
||
* @tparam T
|
||
* @param image the pointer of the input image
|
||
* @param input_shape the input shape of the image
|
||
* @param target_shape the target shape of the resized image
|
||
* @return T* the pointer of the resized image
|
||
*/
|
||
template <typename T>
|
||
T *resize_image_nearest(T *image, std::vector<int> input_shape, std::vector<int> target_shape);
|
||
|
||
/**
|
||
* @brief resize an image to the target shape with nearest method.
|
||
*
|
||
* @tparam T
|
||
* @param image the pointer of the input image
|
||
* @param input_shape the input shape of the image
|
||
* @param resized_image the pointer of the resized image
|
||
* @param target_shape the target shape of the resized image
|
||
*/
|
||
template <typename T>
|
||
void resize_image_nearest(T *image, std::vector<int> input_shape, T *resized_image, std::vector<int> target_shape);
|
||
|
||
} // namespace image
|
||
} // namespace dl
|