The intent of the outputCfgs argument to the configure() function of converter classes and the softISP is to allow the passed in stream-configs to not be changed. But only the vector is const, the reference inside the vector are not const, which allows modifying the stream-configs as can be seen inside DebayerEGL::configure() which was using a non const reference outputCfg helper variable. Fix this by making the references inside the vector const. Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com> Reviewed-by: Barnabás Pőcze <barnabas.pocze@ideasonboard.com> Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
1078 lines
32 KiB
C++
1078 lines
32 KiB
C++
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
|
/*
|
|
* Copyright (C) 2023, Linaro Ltd
|
|
* Copyright (C) 2023-2026 Red Hat Inc.
|
|
*
|
|
* Authors:
|
|
* Hans de Goede <hdegoede@redhat.com>
|
|
*
|
|
* CPU based debayering class
|
|
*/
|
|
|
|
#include "debayer_cpu.h"
|
|
|
|
#include <algorithm>
|
|
#include <stdlib.h>
|
|
#include <sys/ioctl.h>
|
|
#include <utility>
|
|
|
|
#include <linux/dma-buf.h>
|
|
|
|
#include <libcamera/base/thread.h>
|
|
|
|
#include <libcamera/formats.h>
|
|
|
|
#include "libcamera/internal/bayer_format.h"
|
|
#include "libcamera/internal/framebuffer.h"
|
|
#include "libcamera/internal/global_configuration.h"
|
|
#include "libcamera/internal/mapped_framebuffer.h"
|
|
|
|
namespace libcamera {
|
|
|
|
/**
|
|
* \brief Class representing one CPU debayering thread
|
|
*
|
|
* Implementation for CPU based debayering threads.
|
|
*/
|
|
class DebayerCpuThread : public Thread, public Object
|
|
{
|
|
public:
|
|
DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex,
|
|
bool enableInputMemcpy);
|
|
|
|
void configure(unsigned int yStart, unsigned int yEnd);
|
|
void process(uint32_t frame, const uint8_t *src, uint8_t *dst);
|
|
|
|
private:
|
|
void setupInputMemcpy(const uint8_t *linePointers[]);
|
|
void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);
|
|
void memcpyNextLine(const uint8_t *linePointers[]);
|
|
void process2(uint32_t frame, const uint8_t *src, uint8_t *dst);
|
|
void process4(uint32_t frame, const uint8_t *src, uint8_t *dst);
|
|
|
|
/* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */
|
|
static constexpr unsigned int kMaxLineBuffers = 5;
|
|
|
|
DebayerCpu *debayer_;
|
|
unsigned int threadIndex_;
|
|
unsigned int yStart_;
|
|
unsigned int yEnd_;
|
|
unsigned int lineBufferLength_;
|
|
unsigned int lineBufferPadding_;
|
|
unsigned int lineBufferIndex_;
|
|
std::vector<uint8_t> lineBuffers_[kMaxLineBuffers];
|
|
bool enableInputMemcpy_;
|
|
};
|
|
|
|
/**
|
|
* \brief Construct a DebayerCpuThread object
|
|
* \param[in] debayer pointer back to the DebayerCpuObject this thread belongs to
|
|
* \param[in] threadIndex 0 .. n thread-index value for the thread
|
|
* \param[in] enableInputMemcpy when set copy input data to a heap buffer before use
|
|
*/
|
|
DebayerCpuThread::DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex,
|
|
bool enableInputMemcpy)
|
|
: Thread("DebayerCpu:" + std::to_string(threadIndex)),
|
|
debayer_(debayer), threadIndex_(threadIndex),
|
|
enableInputMemcpy_(enableInputMemcpy)
|
|
{
|
|
moveToThread(this);
|
|
}
|
|
|
|
/**
|
|
* \class DebayerCpu
|
|
* \brief Class for debayering on the CPU
|
|
*
|
|
* Implementation for CPU based debayering
|
|
*/
|
|
|
|
/**
|
|
* \brief Constructs a DebayerCpu object
|
|
* \param[in] stats Pointer to the stats object to use
|
|
* \param[in] configuration The global configuration
|
|
*/
|
|
DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats, const GlobalConfiguration &configuration)
|
|
: Debayer(configuration), stats_(std::move(stats))
|
|
{
|
|
/*
|
|
* Reading from uncached buffers may be very slow.
|
|
* In such a case, it's better to copy input buffer data to normal memory.
|
|
* But in case of cached buffers, copying the data is unnecessary overhead.
|
|
* enable_input_memcpy_ makes this behavior configurable. At the moment, we
|
|
* always set it to true as the safer choice but this should be changed in
|
|
* future.
|
|
*
|
|
* \todo Make memcpy automatic based on runtime detection of platform
|
|
* capabilities.
|
|
*/
|
|
bool enableInputMemcpy =
|
|
configuration.option<bool>({ "software_isp", "copy_input_buffer" }).value_or(true);
|
|
|
|
unsigned int threadCount =
|
|
configuration.option<unsigned int>({ "software_isp", "threads" }).value_or(kDefaultThreads);
|
|
threadCount = std::clamp(threadCount, kMinThreads, kMaxThreads);
|
|
threads_.resize(threadCount);
|
|
|
|
for (unsigned int i = 0; i < threads_.size(); i++)
|
|
threads_[i] = std::make_unique<DebayerCpuThread>(this, i, enableInputMemcpy);
|
|
|
|
LOG(Debayer, Debug) << "Thread count " << threadCount;
|
|
}
|
|
|
|
DebayerCpu::~DebayerCpu() = default;
|
|
|
|
#define DECLARE_SRC_POINTERS(pixel_t) \
|
|
const pixel_t *prev = (const pixel_t *)src[0] + xShift_; \
|
|
const pixel_t *curr = (const pixel_t *)src[1] + xShift_; \
|
|
const pixel_t *next = (const pixel_t *)src[2] + xShift_;
|
|
|
|
#define GAMMA(value) \
|
|
*dst++ = gammaLut_[std::clamp(value, 0, static_cast<int>(gammaLut_.size()) - 1)]
|
|
|
|
#define STORE_PIXEL(b_, g_, r_) \
|
|
if constexpr (ccmEnabled) { \
|
|
const CcmColumn &blue = blueCcm_[b_]; \
|
|
const CcmColumn &green = greenCcm_[g_]; \
|
|
const CcmColumn &red = redCcm_[r_]; \
|
|
GAMMA(blue.b + green.b + red.b); \
|
|
GAMMA(blue.g + green.g + red.g); \
|
|
GAMMA(blue.r + green.r + red.r); \
|
|
} else { \
|
|
*dst++ = blue_[b_]; \
|
|
*dst++ = green_[g_]; \
|
|
*dst++ = red_[r_]; \
|
|
} \
|
|
if constexpr (addAlphaByte) \
|
|
*dst++ = 255; \
|
|
x++;
|
|
|
|
/*
|
|
* RGR
|
|
* GBG
|
|
* RGR
|
|
*/
|
|
#define BGGR_BGR888(p, n, div) \
|
|
STORE_PIXEL( \
|
|
curr[x] / (div), \
|
|
(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div)), \
|
|
(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div)))
|
|
|
|
/*
|
|
* GBG
|
|
* RGR
|
|
* GBG
|
|
*/
|
|
#define GRBG_BGR888(p, n, div) \
|
|
STORE_PIXEL( \
|
|
(prev[x] + next[x]) / (2 * (div)), \
|
|
curr[x] / (div), \
|
|
(curr[x - p] + curr[x + n]) / (2 * (div)))
|
|
|
|
/*
|
|
* GRG
|
|
* BGB
|
|
* GRG
|
|
*/
|
|
#define GBRG_BGR888(p, n, div) \
|
|
STORE_PIXEL( \
|
|
(curr[x - p] + curr[x + n]) / (2 * (div)), \
|
|
curr[x] / (div), \
|
|
(prev[x] + next[x]) / (2 * (div)))
|
|
|
|
/*
|
|
* BGB
|
|
* GRG
|
|
* BGB
|
|
*/
|
|
#define RGGB_BGR888(p, n, div) \
|
|
STORE_PIXEL( \
|
|
(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div)), \
|
|
(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div)), \
|
|
curr[x] / (div))
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint8_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
BGGR_BGR888(1, 1, 1)
|
|
GBRG_BGR888(1, 1, 1)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint8_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
GRBG_BGR888(1, 1, 1)
|
|
RGGB_BGR888(1, 1, 1)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint16_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
/* divide values by 4 for 10 -> 8 bpp value */
|
|
BGGR_BGR888(1, 1, 4)
|
|
GBRG_BGR888(1, 1, 4)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint16_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
/* divide values by 4 for 10 -> 8 bpp value */
|
|
GRBG_BGR888(1, 1, 4)
|
|
RGGB_BGR888(1, 1, 4)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint16_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
/* divide values by 16 for 12 -> 8 bpp value */
|
|
BGGR_BGR888(1, 1, 16)
|
|
GBRG_BGR888(1, 1, 16)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
DECLARE_SRC_POINTERS(uint16_t)
|
|
|
|
for (int x = 0; x < (int)window_.width;) {
|
|
/* divide values by 16 for 12 -> 8 bpp value */
|
|
GRBG_BGR888(1, 1, 16)
|
|
RGGB_BGR888(1, 1, 16)
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
const int widthInBytes = window_.width * 5 / 4;
|
|
const uint8_t *prev = src[0];
|
|
const uint8_t *curr = src[1];
|
|
const uint8_t *next = src[2];
|
|
|
|
/*
|
|
* For the first pixel getting a pixel from the previous column uses
|
|
* x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
|
|
* Same for last pixel (uses x + 2) and looking at the next column.
|
|
*/
|
|
for (int x = 0; x < widthInBytes;) {
|
|
/* First pixel */
|
|
BGGR_BGR888(2, 1, 1)
|
|
/* Second pixel BGGR -> GBRG */
|
|
GBRG_BGR888(1, 1, 1)
|
|
/* Same thing for third and fourth pixels */
|
|
BGGR_BGR888(1, 1, 1)
|
|
GBRG_BGR888(1, 2, 1)
|
|
/* Skip 5th src byte with 4 x 2 least-significant-bits */
|
|
x++;
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
const int widthInBytes = window_.width * 5 / 4;
|
|
const uint8_t *prev = src[0];
|
|
const uint8_t *curr = src[1];
|
|
const uint8_t *next = src[2];
|
|
|
|
for (int x = 0; x < widthInBytes;) {
|
|
/* First pixel */
|
|
GRBG_BGR888(2, 1, 1)
|
|
/* Second pixel GRBG -> RGGB */
|
|
RGGB_BGR888(1, 1, 1)
|
|
/* Same thing for third and fourth pixels */
|
|
GRBG_BGR888(1, 1, 1)
|
|
RGGB_BGR888(1, 2, 1)
|
|
/* Skip 5th src byte with 4 x 2 least-significant-bits */
|
|
x++;
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
const int widthInBytes = window_.width * 5 / 4;
|
|
const uint8_t *prev = src[0];
|
|
const uint8_t *curr = src[1];
|
|
const uint8_t *next = src[2];
|
|
|
|
for (int x = 0; x < widthInBytes;) {
|
|
/* Even pixel */
|
|
GBRG_BGR888(2, 1, 1)
|
|
/* Odd pixel GBGR -> BGGR */
|
|
BGGR_BGR888(1, 1, 1)
|
|
/* Same thing for next 2 pixels */
|
|
GBRG_BGR888(1, 1, 1)
|
|
BGGR_BGR888(1, 2, 1)
|
|
/* Skip 5th src byte with 4 x 2 least-significant-bits */
|
|
x++;
|
|
}
|
|
}
|
|
|
|
template<bool addAlphaByte, bool ccmEnabled>
|
|
void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
|
|
{
|
|
const int widthInBytes = window_.width * 5 / 4;
|
|
const uint8_t *prev = src[0];
|
|
const uint8_t *curr = src[1];
|
|
const uint8_t *next = src[2];
|
|
|
|
for (int x = 0; x < widthInBytes;) {
|
|
/* Even pixel */
|
|
RGGB_BGR888(2, 1, 1)
|
|
/* Odd pixel RGGB -> GRBG */
|
|
GRBG_BGR888(1, 1, 1)
|
|
/* Same thing for next 2 pixels */
|
|
RGGB_BGR888(1, 1, 1)
|
|
GRBG_BGR888(1, 2, 1)
|
|
/* Skip 5th src byte with 4 x 2 least-significant-bits */
|
|
x++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Setup the Debayer object according to the passed in parameters.
|
|
* Return 0 on success, a negative errno value on failure
|
|
* (unsupported parameters).
|
|
*/
|
|
int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
|
|
{
|
|
BayerFormat bayerFormat =
|
|
BayerFormat::fromPixelFormat(inputFormat);
|
|
|
|
if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) &&
|
|
bayerFormat.packing == BayerFormat::Packing::None &&
|
|
isStandardBayerOrder(bayerFormat.order)) {
|
|
config.bpp = (bayerFormat.bitDepth + 7) & ~7;
|
|
config.patternSize.width = 2;
|
|
config.patternSize.height = 2;
|
|
config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
|
|
formats::XRGB8888,
|
|
formats::ARGB8888,
|
|
formats::BGR888,
|
|
formats::XBGR8888,
|
|
formats::ABGR8888 });
|
|
return 0;
|
|
}
|
|
|
|
if (bayerFormat.bitDepth == 10 &&
|
|
bayerFormat.packing == BayerFormat::Packing::CSI2 &&
|
|
isStandardBayerOrder(bayerFormat.order)) {
|
|
config.bpp = 10;
|
|
config.patternSize.width = 4; /* 5 bytes per *4* pixels */
|
|
config.patternSize.height = 2;
|
|
config.outputFormats = std::vector<PixelFormat>({ formats::RGB888,
|
|
formats::XRGB8888,
|
|
formats::ARGB8888,
|
|
formats::BGR888,
|
|
formats::XBGR8888,
|
|
formats::ABGR8888 });
|
|
return 0;
|
|
}
|
|
|
|
LOG(Debayer, Info)
|
|
<< "Unsupported input format " << inputFormat.toString();
|
|
return -EINVAL;
|
|
}
|
|
|
|
int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
|
|
{
|
|
if (outputFormat == formats::RGB888 || outputFormat == formats::BGR888) {
|
|
config.bpp = 24;
|
|
return 0;
|
|
}
|
|
|
|
if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 ||
|
|
outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) {
|
|
config.bpp = 32;
|
|
return 0;
|
|
}
|
|
|
|
LOG(Debayer, Info)
|
|
<< "Unsupported output format " << outputFormat.toString();
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Check for standard Bayer orders and set xShift_ and swap debayer0/1, so that
|
|
* a single pair of BGGR debayer functions can be used for all 4 standard orders.
|
|
*/
|
|
int DebayerCpu::setupStandardBayerOrder(BayerFormat::Order order)
|
|
{
|
|
switch (order) {
|
|
case BayerFormat::BGGR:
|
|
break;
|
|
case BayerFormat::GBRG:
|
|
xShift_ = 1; /* BGGR -> GBRG */
|
|
break;
|
|
case BayerFormat::GRBG:
|
|
std::swap(debayer0_, debayer1_); /* BGGR -> GRBG */
|
|
break;
|
|
case BayerFormat::RGGB:
|
|
xShift_ = 1; /* BGGR -> GBRG */
|
|
std::swap(debayer0_, debayer1_); /* GBRG -> RGGB */
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define SET_DEBAYER_METHODS(method0, method1) \
|
|
debayer0_ = addAlphaByte \
|
|
? (ccmEnabled ? &DebayerCpu::method0<true, true> : &DebayerCpu::method0<true, false>) \
|
|
: (ccmEnabled ? &DebayerCpu::method0<false, true> : &DebayerCpu::method0<false, false>); \
|
|
debayer1_ = addAlphaByte \
|
|
? (ccmEnabled ? &DebayerCpu::method1<true, true> : &DebayerCpu::method1<true, false>) \
|
|
: (ccmEnabled ? &DebayerCpu::method1<false, true> : &DebayerCpu::method1<false, false>);
|
|
|
|
int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat,
|
|
PixelFormat outputFormat,
|
|
bool ccmEnabled)
|
|
{
|
|
BayerFormat bayerFormat =
|
|
BayerFormat::fromPixelFormat(inputFormat);
|
|
bool addAlphaByte = false;
|
|
|
|
xShift_ = 0;
|
|
swapRedBlueGains_ = false;
|
|
|
|
auto invalidFmt = []() -> int {
|
|
LOG(Debayer, Error) << "Unsupported input output format combination";
|
|
return -EINVAL;
|
|
};
|
|
|
|
switch (outputFormat) {
|
|
case formats::XRGB8888:
|
|
case formats::ARGB8888:
|
|
addAlphaByte = true;
|
|
[[fallthrough]];
|
|
case formats::RGB888:
|
|
break;
|
|
case formats::XBGR8888:
|
|
case formats::ABGR8888:
|
|
addAlphaByte = true;
|
|
[[fallthrough]];
|
|
case formats::BGR888:
|
|
/* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
|
|
swapRedBlueGains_ = true;
|
|
|
|
switch (bayerFormat.order) {
|
|
case BayerFormat::BGGR:
|
|
bayerFormat.order = BayerFormat::RGGB;
|
|
break;
|
|
case BayerFormat::GBRG:
|
|
bayerFormat.order = BayerFormat::GRBG;
|
|
break;
|
|
case BayerFormat::GRBG:
|
|
bayerFormat.order = BayerFormat::GBRG;
|
|
break;
|
|
case BayerFormat::RGGB:
|
|
bayerFormat.order = BayerFormat::BGGR;
|
|
break;
|
|
default:
|
|
return invalidFmt();
|
|
}
|
|
break;
|
|
default:
|
|
return invalidFmt();
|
|
}
|
|
|
|
if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) &&
|
|
bayerFormat.packing == BayerFormat::Packing::None &&
|
|
isStandardBayerOrder(bayerFormat.order)) {
|
|
switch (bayerFormat.bitDepth) {
|
|
case 8:
|
|
SET_DEBAYER_METHODS(debayer8_BGBG_BGR888, debayer8_GRGR_BGR888)
|
|
break;
|
|
case 10:
|
|
SET_DEBAYER_METHODS(debayer10_BGBG_BGR888, debayer10_GRGR_BGR888)
|
|
break;
|
|
case 12:
|
|
SET_DEBAYER_METHODS(debayer12_BGBG_BGR888, debayer12_GRGR_BGR888)
|
|
break;
|
|
}
|
|
setupStandardBayerOrder(bayerFormat.order);
|
|
return 0;
|
|
}
|
|
|
|
if (bayerFormat.bitDepth == 10 &&
|
|
bayerFormat.packing == BayerFormat::Packing::CSI2) {
|
|
switch (bayerFormat.order) {
|
|
case BayerFormat::BGGR:
|
|
SET_DEBAYER_METHODS(debayer10P_BGBG_BGR888, debayer10P_GRGR_BGR888)
|
|
return 0;
|
|
case BayerFormat::GBRG:
|
|
SET_DEBAYER_METHODS(debayer10P_GBGB_BGR888, debayer10P_RGRG_BGR888)
|
|
return 0;
|
|
case BayerFormat::GRBG:
|
|
SET_DEBAYER_METHODS(debayer10P_GRGR_BGR888, debayer10P_BGBG_BGR888)
|
|
return 0;
|
|
case BayerFormat::RGGB:
|
|
SET_DEBAYER_METHODS(debayer10P_RGRG_BGR888, debayer10P_GBGB_BGR888)
|
|
return 0;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return invalidFmt();
|
|
}
|
|
|
|
int DebayerCpu::configure(const StreamConfiguration &inputCfg,
|
|
const std::vector<std::reference_wrapper<const StreamConfiguration>> &outputCfgs,
|
|
bool ccmEnabled)
|
|
{
|
|
if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
|
|
return -EINVAL;
|
|
|
|
if (stats_->configure(inputCfg, threads_.size()) != 0)
|
|
return -EINVAL;
|
|
|
|
const Size &statsPatternSize = stats_->patternSize();
|
|
if (inputConfig_.patternSize.width != statsPatternSize.width ||
|
|
inputConfig_.patternSize.height != statsPatternSize.height) {
|
|
LOG(Debayer, Error)
|
|
<< "mismatching stats and debayer pattern sizes for "
|
|
<< inputCfg.pixelFormat.toString();
|
|
return -EINVAL;
|
|
}
|
|
|
|
inputConfig_.stride = inputCfg.stride;
|
|
|
|
if (outputCfgs.size() != 1) {
|
|
LOG(Debayer, Error)
|
|
<< "Unsupported number of output streams: "
|
|
<< outputCfgs.size();
|
|
return -EINVAL;
|
|
}
|
|
|
|
const StreamConfiguration &outputCfg = outputCfgs[0];
|
|
SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
|
|
std::tie(outputConfig_.stride, outputConfig_.frameSize) =
|
|
strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
|
|
|
|
if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
|
|
LOG(Debayer, Error)
|
|
<< "Invalid output size/stride: "
|
|
<< "\n " << outputCfg.size << " (" << outSizeRange << ")"
|
|
<< "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")";
|
|
return -EINVAL;
|
|
}
|
|
|
|
int ret = setDebayerFunctions(inputCfg.pixelFormat,
|
|
outputCfg.pixelFormat,
|
|
ccmEnabled);
|
|
if (ret != 0)
|
|
return -EINVAL;
|
|
|
|
ccmEnabled_ = ccmEnabled;
|
|
|
|
/*
|
|
* Lookup tables must be initialized because the initial value is used for
|
|
* the first two frames, i.e. until stats processing starts providing its
|
|
* own parameters. Let's enforce recomputing lookup tables by setting the
|
|
* stored last used gamma to an out-of-range value.
|
|
*/
|
|
params_.gamma = 1.0;
|
|
|
|
window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
|
|
~(inputConfig_.patternSize.width - 1);
|
|
window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
|
|
~(inputConfig_.patternSize.height - 1);
|
|
window_.width = outputCfg.size.width;
|
|
window_.height = outputCfg.size.height;
|
|
|
|
/*
|
|
* Set the stats window to the whole processed window. Its coordinates are
|
|
* relative to the debayered area since debayering passes only the part of
|
|
* data to be processed to the stats; see SwStatsCpu::setWindow.
|
|
*/
|
|
stats_->setWindow(Rectangle(window_.size()));
|
|
|
|
unsigned int yStart = 0;
|
|
unsigned int linesPerThread = (window_.height / threads_.size()) &
|
|
~(inputConfig_.patternSize.height - 1);
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < (threads_.size() - 1); i++) {
|
|
threads_[i]->configure(yStart, yStart + linesPerThread);
|
|
yStart += linesPerThread;
|
|
}
|
|
threads_[i]->configure(yStart, window_.height);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Configure thread to process a specific part of the image
|
|
* \param[in] yStart y coordinate of first line to process
|
|
* \param[in] yEnd y coordinate of the line at which to stop processing
|
|
*
|
|
* Configure the thread to process lines from yStart to yEnd - 1.
|
|
*/
|
|
void DebayerCpuThread::configure(unsigned int yStart, unsigned int yEnd)
|
|
{
|
|
Debayer::DebayerInputConfig &inputConfig = debayer_->inputConfig_;
|
|
|
|
yStart_ = yStart;
|
|
yEnd_ = yEnd;
|
|
|
|
/* pad with patternSize.Width on both left and right side */
|
|
lineBufferPadding_ = inputConfig.patternSize.width * inputConfig.bpp / 8;
|
|
lineBufferLength_ = debayer_->window_.width * inputConfig.bpp / 8 +
|
|
2 * lineBufferPadding_;
|
|
|
|
if (enableInputMemcpy_) {
|
|
for (unsigned int i = 0; i <= inputConfig.patternSize.height; i++)
|
|
lineBuffers_[i].resize(lineBufferLength_);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get width and height at which the bayer-pattern repeats.
|
|
* Return pattern-size or an empty Size for an unsupported inputFormat.
|
|
*/
|
|
Size DebayerCpu::patternSize(PixelFormat inputFormat)
|
|
{
|
|
DebayerCpu::DebayerInputConfig config;
|
|
|
|
if (getInputConfig(inputFormat, config) != 0)
|
|
return {};
|
|
|
|
return config.patternSize;
|
|
}
|
|
|
|
std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)
|
|
{
|
|
DebayerCpu::DebayerInputConfig config;
|
|
|
|
if (getInputConfig(inputFormat, config) != 0)
|
|
return std::vector<PixelFormat>();
|
|
|
|
return config.outputFormats;
|
|
}
|
|
|
|
std::tuple<unsigned int, unsigned int>
|
|
DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
|
|
{
|
|
DebayerCpu::DebayerOutputConfig config;
|
|
|
|
if (getOutputConfig(outputFormat, config) != 0)
|
|
return std::make_tuple(0, 0);
|
|
|
|
/* round up to multiple of 8 for 64 bits alignment */
|
|
unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
|
|
|
|
return std::make_tuple(stride, stride * size.height);
|
|
}
|
|
|
|
void DebayerCpuThread::setupInputMemcpy(const uint8_t *linePointers[])
|
|
{
|
|
const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;
|
|
|
|
if (!enableInputMemcpy_)
|
|
return;
|
|
|
|
for (unsigned int i = 0; i < patternHeight; i++) {
|
|
memcpy(lineBuffers_[i].data(),
|
|
linePointers[i + 1] - lineBufferPadding_,
|
|
lineBufferLength_);
|
|
linePointers[i + 1] = lineBuffers_[i].data() + lineBufferPadding_;
|
|
}
|
|
|
|
/* Point lineBufferIndex_ to first unused lineBuffer */
|
|
lineBufferIndex_ = patternHeight;
|
|
}
|
|
|
|
void DebayerCpuThread::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)
|
|
{
|
|
const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;
|
|
|
|
for (unsigned int i = 0; i < patternHeight; i++)
|
|
linePointers[i] = linePointers[i + 1];
|
|
|
|
linePointers[patternHeight] =
|
|
src + (patternHeight / 2) * (int)debayer_->inputConfig_.stride;
|
|
}
|
|
|
|
void DebayerCpuThread::memcpyNextLine(const uint8_t *linePointers[])
|
|
{
|
|
const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height;
|
|
|
|
if (!enableInputMemcpy_)
|
|
return;
|
|
|
|
memcpy(lineBuffers_[lineBufferIndex_].data(),
|
|
linePointers[patternHeight] - lineBufferPadding_,
|
|
lineBufferLength_);
|
|
linePointers[patternHeight] = lineBuffers_[lineBufferIndex_].data() + lineBufferPadding_;
|
|
|
|
lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);
|
|
}
|
|
|
|
/**
|
|
* \brief Process part of the image assigned to this debayer thread
|
|
* \param[in] frame The frame number
|
|
* \param[in] src The source buffer
|
|
* \param[in] dst The destination buffer
|
|
*/
|
|
void DebayerCpuThread::process(uint32_t frame, const uint8_t *src, uint8_t *dst)
|
|
{
|
|
Rectangle &window = debayer_->window_;
|
|
|
|
/* Adjust src to top left corner of the window */
|
|
src += (window.y + yStart_) * debayer_->inputConfig_.stride +
|
|
window.x * debayer_->inputConfig_.bpp / 8;
|
|
/* Adjust dst for yStart_ */
|
|
dst += yStart_ * debayer_->outputConfig_.stride;
|
|
|
|
if (debayer_->inputConfig_.patternSize.height == 2)
|
|
process2(frame, src, dst);
|
|
else
|
|
process4(frame, src, dst);
|
|
|
|
debayer_->workPendingMutex_.lock();
|
|
debayer_->workPending_ &= ~(1 << threadIndex_);
|
|
debayer_->workPendingMutex_.unlock();
|
|
debayer_->workPendingCv_.notify_one();
|
|
}
|
|
|
|
void DebayerCpuThread::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
|
|
{
|
|
unsigned int outputStride = debayer_->outputConfig_.stride;
|
|
unsigned int inputStride = debayer_->inputConfig_.stride;
|
|
Rectangle &window = debayer_->window_;
|
|
unsigned int yEnd = yEnd_;
|
|
/* Holds [0] previous- [1] current- [2] next-line */
|
|
const uint8_t *linePointers[3];
|
|
|
|
/* [x] becomes [x - 1] after initial shiftLinePointers() call */
|
|
if (window.y + yStart_) {
|
|
linePointers[1] = src - inputStride; /* previous-line */
|
|
linePointers[2] = src;
|
|
} else {
|
|
/* Top line, use the next line as prev line */
|
|
linePointers[1] = src + inputStride;
|
|
linePointers[2] = src;
|
|
}
|
|
|
|
if (window.y == 0 && yEnd_ == window.height) {
|
|
/*
|
|
* Last 2 lines also need special handling.
|
|
* (And configure() ensures that yEnd >= 2.)
|
|
*/
|
|
yEnd -= 2;
|
|
}
|
|
|
|
setupInputMemcpy(linePointers);
|
|
|
|
/*
|
|
* Note y is the line-number *inside* the window, since stats_' window
|
|
* is the stats window inside/relative to the debayer window. IOW for
|
|
* single thread rendering y goes from 0 to window.height.
|
|
*/
|
|
for (unsigned int y = yStart_; y < yEnd; y += 2) {
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->stats_->processLine0(frame, y, linePointers, threadIndex_);
|
|
debayer_->debayer0(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->debayer1(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
}
|
|
|
|
if (window.y == 0 && yEnd_ == window.height) {
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->stats_->processLine0(frame, yEnd, linePointers, threadIndex_);
|
|
debayer_->debayer0(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
|
|
shiftLinePointers(linePointers, src);
|
|
/* next line may point outside of src, use prev. */
|
|
linePointers[2] = linePointers[0];
|
|
debayer_->debayer1(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
}
|
|
}
|
|
|
|
void DebayerCpuThread::process4(uint32_t frame, const uint8_t *src, uint8_t *dst)
|
|
{
|
|
unsigned int outputStride = debayer_->outputConfig_.stride;
|
|
unsigned int inputStride = debayer_->inputConfig_.stride;
|
|
|
|
/*
|
|
* This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line
|
|
* [3] 1-line-down [4] 2-lines-down.
|
|
*/
|
|
const uint8_t *linePointers[5];
|
|
|
|
/* [x] becomes [x - 1] after initial shiftLinePointers() call */
|
|
linePointers[1] = src - 2 * inputStride;
|
|
linePointers[2] = src - inputStride;
|
|
linePointers[3] = src;
|
|
linePointers[4] = src + inputStride;
|
|
|
|
setupInputMemcpy(linePointers);
|
|
|
|
/*
|
|
* Note y is the line-number *inside* the window, since stats_' window
|
|
* is the stats window inside/relative to the debayer window. IOW for
|
|
* single thread rendering y goes from 0 to window.height.
|
|
*/
|
|
for (unsigned int y = yStart_; y < yEnd_; y += 4) {
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->stats_->processLine0(frame, y, linePointers, threadIndex_);
|
|
debayer_->debayer0(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->debayer1(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->stats_->processLine2(frame, y, linePointers, threadIndex_);
|
|
debayer_->debayer2(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
|
|
shiftLinePointers(linePointers, src);
|
|
memcpyNextLine(linePointers);
|
|
debayer_->debayer3(dst, linePointers);
|
|
src += inputStride;
|
|
dst += outputStride;
|
|
}
|
|
}
|
|
|
|
void DebayerCpu::updateGammaTable(const DebayerParams ¶ms)
|
|
{
|
|
const RGB<float> blackLevel = params.blackLevel;
|
|
/* Take let's say the green channel black level */
|
|
const unsigned int blackIndex = blackLevel[1] * gammaTable_.size();
|
|
const float gamma = params.gamma;
|
|
const float contrastExp = params.contrastExp;
|
|
|
|
const float divisor = gammaTable_.size() - blackIndex - 1.0;
|
|
for (unsigned int i = blackIndex; i < gammaTable_.size(); i++) {
|
|
float normalized = (i - blackIndex) / divisor;
|
|
/* Convert 0..2 to 0..infinity; avoid actual inifinity at tan(pi/2) */
|
|
/* Apply simple S-curve */
|
|
if (normalized < 0.5)
|
|
normalized = 0.5 * std::pow(normalized / 0.5, contrastExp);
|
|
else
|
|
normalized = 1.0 - 0.5 * std::pow((1.0 - normalized) / 0.5, contrastExp);
|
|
gammaTable_[i] = UINT8_MAX *
|
|
std::pow(normalized, gamma);
|
|
}
|
|
/*
|
|
* Due to CCM operations, the table lookup may reach indices below the black
|
|
* level. Let's set the table values below black level to the minimum
|
|
* non-black value to prevent problems when the minimum value is
|
|
* significantly non-zero (for example, when the image should be all grey).
|
|
*/
|
|
std::fill(gammaTable_.begin(), gammaTable_.begin() + blackIndex,
|
|
gammaTable_[blackIndex]);
|
|
}
|
|
|
|
void DebayerCpu::updateLookupTables(const DebayerParams ¶ms)
|
|
{
|
|
const bool gammaUpdateNeeded =
|
|
params.gamma != params_.gamma ||
|
|
params.blackLevel != params_.blackLevel ||
|
|
params.contrastExp != params_.contrastExp;
|
|
if (gammaUpdateNeeded)
|
|
updateGammaTable(params);
|
|
|
|
auto matrixChanged = [](const Matrix<float, 3, 3> &m1, const Matrix<float, 3, 3> &m2) -> bool {
|
|
return !std::equal(m1.data().begin(), m1.data().end(), m2.data().begin());
|
|
};
|
|
const unsigned int gammaTableSize = gammaTable_.size();
|
|
const double div = static_cast<double>(kRGBLookupSize) / gammaTableSize;
|
|
if (ccmEnabled_) {
|
|
if (gammaUpdateNeeded ||
|
|
matrixChanged(params.combinedMatrix, params_.combinedMatrix)) {
|
|
auto &red = swapRedBlueGains_ ? blueCcm_ : redCcm_;
|
|
auto &green = greenCcm_;
|
|
auto &blue = swapRedBlueGains_ ? redCcm_ : blueCcm_;
|
|
const unsigned int redIndex = swapRedBlueGains_ ? 2 : 0;
|
|
const unsigned int greenIndex = 1;
|
|
const unsigned int blueIndex = swapRedBlueGains_ ? 0 : 2;
|
|
for (unsigned int i = 0; i < kRGBLookupSize; i++) {
|
|
red[i].r = std::round(i * params.combinedMatrix[redIndex][0]);
|
|
red[i].g = std::round(i * params.combinedMatrix[greenIndex][0]);
|
|
red[i].b = std::round(i * params.combinedMatrix[blueIndex][0]);
|
|
green[i].r = std::round(i * params.combinedMatrix[redIndex][1]);
|
|
green[i].g = std::round(i * params.combinedMatrix[greenIndex][1]);
|
|
green[i].b = std::round(i * params.combinedMatrix[blueIndex][1]);
|
|
blue[i].r = std::round(i * params.combinedMatrix[redIndex][2]);
|
|
blue[i].g = std::round(i * params.combinedMatrix[greenIndex][2]);
|
|
blue[i].b = std::round(i * params.combinedMatrix[blueIndex][2]);
|
|
gammaLut_[i] = gammaTable_[i / div];
|
|
}
|
|
}
|
|
} else {
|
|
if (gammaUpdateNeeded || params.gains != params_.gains) {
|
|
auto &gains = params.gains;
|
|
auto &red = swapRedBlueGains_ ? blue_ : red_;
|
|
auto &green = green_;
|
|
auto &blue = swapRedBlueGains_ ? red_ : blue_;
|
|
for (unsigned int i = 0; i < kRGBLookupSize; i++) {
|
|
/* Apply gamma after gain! */
|
|
const RGB<float> lutGains = (gains * i / div).min(gammaTableSize - 1);
|
|
red[i] = gammaTable_[static_cast<unsigned int>(lutGains.r())];
|
|
green[i] = gammaTable_[static_cast<unsigned int>(lutGains.g())];
|
|
blue[i] = gammaTable_[static_cast<unsigned int>(lutGains.b())];
|
|
}
|
|
}
|
|
}
|
|
|
|
LOG(Debayer, Debug)
|
|
<< "Debayer parameters: blackLevel=" << params.blackLevel
|
|
<< "; gamma=" << params.gamma
|
|
<< "; contrastExp=" << params.contrastExp
|
|
<< "; gains=" << params.gains
|
|
<< "; matrix=" << params.combinedMatrix;
|
|
|
|
params_ = params;
|
|
}
|
|
|
|
void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms)
|
|
{
|
|
bench_.startFrame();
|
|
|
|
std::vector<DmaSyncer> dmaSyncers;
|
|
|
|
dmaSyncBegin(dmaSyncers, input, output);
|
|
|
|
updateLookupTables(params);
|
|
|
|
/* Copy metadata from the input buffer */
|
|
FrameMetadata &metadata = output->_d()->metadata();
|
|
metadata.status = input->metadata().status;
|
|
metadata.sequence = input->metadata().sequence;
|
|
metadata.timestamp = input->metadata().timestamp;
|
|
|
|
MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
|
|
MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
|
|
if (!in.isValid() || !out.isValid()) {
|
|
LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
|
|
metadata.status = FrameMetadata::FrameError;
|
|
return;
|
|
}
|
|
|
|
stats_->startFrame(frame);
|
|
|
|
workPendingMutex_.lock();
|
|
workPending_ = (1 << threads_.size()) - 1;
|
|
workPendingMutex_.unlock();
|
|
|
|
for (auto &thread : threads_)
|
|
thread->invokeMethod(&DebayerCpuThread::process,
|
|
ConnectionTypeQueued, frame,
|
|
in.planes()[0].data(), out.planes()[0].data());
|
|
|
|
{
|
|
MutexLocker locker(workPendingMutex_);
|
|
workPendingCv_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(workPendingMutex_) {
|
|
return workPending_ == 0;
|
|
});
|
|
}
|
|
|
|
metadata.planes()[0].bytesused = out.planes()[0].size();
|
|
|
|
dmaSyncers.clear();
|
|
|
|
/* Measure before emitting signals */
|
|
bench_.finishFrame();
|
|
|
|
/*
|
|
* Buffer ids are currently not used, so pass zeros as its parameter.
|
|
*
|
|
* \todo Pass real bufferId once stats buffer passing is changed.
|
|
*/
|
|
stats_->finishFrame(frame, 0);
|
|
outputBufferReady.emit(output);
|
|
inputBufferReady.emit(input);
|
|
}
|
|
|
|
int DebayerCpu::start()
|
|
{
|
|
for (auto &thread : threads_)
|
|
thread->start();
|
|
|
|
return 0;
|
|
}
|
|
|
|
void DebayerCpu::stop()
|
|
{
|
|
for (auto &thread : threads_)
|
|
thread->exit();
|
|
|
|
for (auto &thread : threads_)
|
|
thread->wait();
|
|
}
|
|
|
|
SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)
|
|
{
|
|
Size patternSize = this->patternSize(inputFormat);
|
|
unsigned int borderHeight = patternSize.height;
|
|
|
|
if (patternSize.isNull())
|
|
return {};
|
|
|
|
/* No need for top/bottom border with a pattern height of 2 */
|
|
if (patternSize.height == 2)
|
|
borderHeight = 0;
|
|
|
|
/*
|
|
* For debayer interpolation a border is kept around the entire image
|
|
* and the minimum output size is pattern-height x pattern-width.
|
|
*/
|
|
if (inputSize.width < (3 * patternSize.width) ||
|
|
inputSize.height < (2 * borderHeight + patternSize.height)) {
|
|
LOG(Debayer, Warning)
|
|
<< "Input format size too small: " << inputSize.toString();
|
|
return {};
|
|
}
|
|
|
|
return SizeRange(Size(patternSize.width, patternSize.height),
|
|
Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1),
|
|
(inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)),
|
|
patternSize.width, patternSize.height);
|
|
}
|
|
|
|
} /* namespace libcamera */
|