/* SPDX-License-Identifier: LGPL-2.1-or-later */ /* * Copyright (C) 2023, Linaro Ltd * Copyright (C) 2023-2026 Red Hat Inc. * * Authors: * Hans de Goede * * CPU based debayering class */ #include "debayer_cpu.h" #include #include #include #include #include #include #include #include "libcamera/internal/bayer_format.h" #include "libcamera/internal/framebuffer.h" #include "libcamera/internal/global_configuration.h" #include "libcamera/internal/mapped_framebuffer.h" namespace libcamera { /** * \brief Class representing one CPU debayering thread * * Implementation for CPU based debayering threads. */ class DebayerCpuThread : public Thread, public Object { public: DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex, bool enableInputMemcpy); void configure(unsigned int yStart, unsigned int yEnd); void process(uint32_t frame, const uint8_t *src, uint8_t *dst); private: void setupInputMemcpy(const uint8_t *linePointers[]); void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); void memcpyNextLine(const uint8_t *linePointers[]); void process2(uint32_t frame, const uint8_t *src, uint8_t *dst); void process4(uint32_t frame, const uint8_t *src, uint8_t *dst); /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */ static constexpr unsigned int kMaxLineBuffers = 5; DebayerCpu *debayer_; unsigned int threadIndex_; unsigned int yStart_; unsigned int yEnd_; unsigned int lineBufferLength_; unsigned int lineBufferPadding_; unsigned int lineBufferIndex_; std::vector lineBuffers_[kMaxLineBuffers]; bool enableInputMemcpy_; }; /** * \brief Construct a DebayerCpuThread object * \param[in] debayer pointer back to the DebayerCpuObject this thread belongs to * \param[in] threadIndex 0 .. n thread-index value for the thread * \param[in] enableInputMemcpy when set copy input data to a heap buffer before use */ DebayerCpuThread::DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex, bool enableInputMemcpy) : Thread("DebayerCpu:" + std::to_string(threadIndex)), debayer_(debayer), threadIndex_(threadIndex), enableInputMemcpy_(enableInputMemcpy) { moveToThread(this); } /** * \class DebayerCpu * \brief Class for debayering on the CPU * * Implementation for CPU based debayering */ /** * \brief Constructs a DebayerCpu object * \param[in] stats Pointer to the stats object to use * \param[in] configuration The global configuration */ DebayerCpu::DebayerCpu(std::unique_ptr stats, const GlobalConfiguration &configuration) : Debayer(configuration), stats_(std::move(stats)) { /* * Reading from uncached buffers may be very slow. * In such a case, it's better to copy input buffer data to normal memory. * But in case of cached buffers, copying the data is unnecessary overhead. * enable_input_memcpy_ makes this behavior configurable. At the moment, we * always set it to true as the safer choice but this should be changed in * future. * * \todo Make memcpy automatic based on runtime detection of platform * capabilities. */ bool enableInputMemcpy = configuration.option({ "software_isp", "copy_input_buffer" }).value_or(true); unsigned int threadCount = configuration.option({ "software_isp", "threads" }).value_or(kDefaultThreads); threadCount = std::clamp(threadCount, kMinThreads, kMaxThreads); threads_.resize(threadCount); for (unsigned int i = 0; i < threads_.size(); i++) threads_[i] = std::make_unique(this, i, enableInputMemcpy); LOG(Debayer, Debug) << "Thread count " << threadCount; } DebayerCpu::~DebayerCpu() = default; #define DECLARE_SRC_POINTERS(pixel_t) \ const pixel_t *prev = (const pixel_t *)src[0] + xShift_; \ const pixel_t *curr = (const pixel_t *)src[1] + xShift_; \ const pixel_t *next = (const pixel_t *)src[2] + xShift_; #define GAMMA(value) \ *dst++ = gammaLut_[std::clamp(value, 0, static_cast(gammaLut_.size()) - 1)] #define STORE_PIXEL(b_, g_, r_) \ if constexpr (ccmEnabled) { \ const CcmColumn &blue = blueCcm_[b_]; \ const CcmColumn &green = greenCcm_[g_]; \ const CcmColumn &red = redCcm_[r_]; \ GAMMA(blue.b + green.b + red.b); \ GAMMA(blue.g + green.g + red.g); \ GAMMA(blue.r + green.r + red.r); \ } else { \ *dst++ = blue_[b_]; \ *dst++ = green_[g_]; \ *dst++ = red_[r_]; \ } \ if constexpr (addAlphaByte) \ *dst++ = 255; \ x++; /* * RGR * GBG * RGR */ #define BGGR_BGR888(p, n, div) \ STORE_PIXEL( \ curr[x] / (div), \ (prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div)), \ (prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))) /* * GBG * RGR * GBG */ #define GRBG_BGR888(p, n, div) \ STORE_PIXEL( \ (prev[x] + next[x]) / (2 * (div)), \ curr[x] / (div), \ (curr[x - p] + curr[x + n]) / (2 * (div))) /* * GRG * BGB * GRG */ #define GBRG_BGR888(p, n, div) \ STORE_PIXEL( \ (curr[x - p] + curr[x + n]) / (2 * (div)), \ curr[x] / (div), \ (prev[x] + next[x]) / (2 * (div))) /* * BGB * GRG * BGB */ #define RGGB_BGR888(p, n, div) \ STORE_PIXEL( \ (prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div)), \ (prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div)), \ curr[x] / (div)) template void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint8_t) for (int x = 0; x < (int)window_.width;) { BGGR_BGR888(1, 1, 1) GBRG_BGR888(1, 1, 1) } } template void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint8_t) for (int x = 0; x < (int)window_.width;) { GRBG_BGR888(1, 1, 1) RGGB_BGR888(1, 1, 1) } } template void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint16_t) for (int x = 0; x < (int)window_.width;) { /* divide values by 4 for 10 -> 8 bpp value */ BGGR_BGR888(1, 1, 4) GBRG_BGR888(1, 1, 4) } } template void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint16_t) for (int x = 0; x < (int)window_.width;) { /* divide values by 4 for 10 -> 8 bpp value */ GRBG_BGR888(1, 1, 4) RGGB_BGR888(1, 1, 4) } } template void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint16_t) for (int x = 0; x < (int)window_.width;) { /* divide values by 16 for 12 -> 8 bpp value */ BGGR_BGR888(1, 1, 16) GBRG_BGR888(1, 1, 16) } } template void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) { DECLARE_SRC_POINTERS(uint16_t) for (int x = 0; x < (int)window_.width;) { /* divide values by 16 for 12 -> 8 bpp value */ GRBG_BGR888(1, 1, 16) RGGB_BGR888(1, 1, 16) } } template void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) { const int widthInBytes = window_.width * 5 / 4; const uint8_t *prev = src[0]; const uint8_t *curr = src[1]; const uint8_t *next = src[2]; /* * For the first pixel getting a pixel from the previous column uses * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. * Same for last pixel (uses x + 2) and looking at the next column. */ for (int x = 0; x < widthInBytes;) { /* First pixel */ BGGR_BGR888(2, 1, 1) /* Second pixel BGGR -> GBRG */ GBRG_BGR888(1, 1, 1) /* Same thing for third and fourth pixels */ BGGR_BGR888(1, 1, 1) GBRG_BGR888(1, 2, 1) /* Skip 5th src byte with 4 x 2 least-significant-bits */ x++; } } template void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) { const int widthInBytes = window_.width * 5 / 4; const uint8_t *prev = src[0]; const uint8_t *curr = src[1]; const uint8_t *next = src[2]; for (int x = 0; x < widthInBytes;) { /* First pixel */ GRBG_BGR888(2, 1, 1) /* Second pixel GRBG -> RGGB */ RGGB_BGR888(1, 1, 1) /* Same thing for third and fourth pixels */ GRBG_BGR888(1, 1, 1) RGGB_BGR888(1, 2, 1) /* Skip 5th src byte with 4 x 2 least-significant-bits */ x++; } } template void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) { const int widthInBytes = window_.width * 5 / 4; const uint8_t *prev = src[0]; const uint8_t *curr = src[1]; const uint8_t *next = src[2]; for (int x = 0; x < widthInBytes;) { /* Even pixel */ GBRG_BGR888(2, 1, 1) /* Odd pixel GBGR -> BGGR */ BGGR_BGR888(1, 1, 1) /* Same thing for next 2 pixels */ GBRG_BGR888(1, 1, 1) BGGR_BGR888(1, 2, 1) /* Skip 5th src byte with 4 x 2 least-significant-bits */ x++; } } template void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) { const int widthInBytes = window_.width * 5 / 4; const uint8_t *prev = src[0]; const uint8_t *curr = src[1]; const uint8_t *next = src[2]; for (int x = 0; x < widthInBytes;) { /* Even pixel */ RGGB_BGR888(2, 1, 1) /* Odd pixel RGGB -> GRBG */ GRBG_BGR888(1, 1, 1) /* Same thing for next 2 pixels */ RGGB_BGR888(1, 1, 1) GRBG_BGR888(1, 2, 1) /* Skip 5th src byte with 4 x 2 least-significant-bits */ x++; } } /* * Setup the Debayer object according to the passed in parameters. * Return 0 on success, a negative errno value on failure * (unsupported parameters). */ int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) { BayerFormat bayerFormat = BayerFormat::fromPixelFormat(inputFormat); if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) && bayerFormat.packing == BayerFormat::Packing::None && isStandardBayerOrder(bayerFormat.order)) { config.bpp = (bayerFormat.bitDepth + 7) & ~7; config.patternSize.width = 2; config.patternSize.height = 2; config.outputFormats = std::vector({ formats::RGB888, formats::XRGB8888, formats::ARGB8888, formats::BGR888, formats::XBGR8888, formats::ABGR8888 }); return 0; } if (bayerFormat.bitDepth == 10 && bayerFormat.packing == BayerFormat::Packing::CSI2 && isStandardBayerOrder(bayerFormat.order)) { config.bpp = 10; config.patternSize.width = 4; /* 5 bytes per *4* pixels */ config.patternSize.height = 2; config.outputFormats = std::vector({ formats::RGB888, formats::XRGB8888, formats::ARGB8888, formats::BGR888, formats::XBGR8888, formats::ABGR8888 }); return 0; } LOG(Debayer, Info) << "Unsupported input format " << inputFormat.toString(); return -EINVAL; } int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) { if (outputFormat == formats::RGB888 || outputFormat == formats::BGR888) { config.bpp = 24; return 0; } if (outputFormat == formats::XRGB8888 || outputFormat == formats::ARGB8888 || outputFormat == formats::XBGR8888 || outputFormat == formats::ABGR8888) { config.bpp = 32; return 0; } LOG(Debayer, Info) << "Unsupported output format " << outputFormat.toString(); return -EINVAL; } /* * Check for standard Bayer orders and set xShift_ and swap debayer0/1, so that * a single pair of BGGR debayer functions can be used for all 4 standard orders. */ int DebayerCpu::setupStandardBayerOrder(BayerFormat::Order order) { switch (order) { case BayerFormat::BGGR: break; case BayerFormat::GBRG: xShift_ = 1; /* BGGR -> GBRG */ break; case BayerFormat::GRBG: std::swap(debayer0_, debayer1_); /* BGGR -> GRBG */ break; case BayerFormat::RGGB: xShift_ = 1; /* BGGR -> GBRG */ std::swap(debayer0_, debayer1_); /* GBRG -> RGGB */ break; default: return -EINVAL; } return 0; } #define SET_DEBAYER_METHODS(method0, method1) \ debayer0_ = addAlphaByte \ ? (ccmEnabled ? &DebayerCpu::method0 : &DebayerCpu::method0) \ : (ccmEnabled ? &DebayerCpu::method0 : &DebayerCpu::method0); \ debayer1_ = addAlphaByte \ ? (ccmEnabled ? &DebayerCpu::method1 : &DebayerCpu::method1) \ : (ccmEnabled ? &DebayerCpu::method1 : &DebayerCpu::method1); int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat, bool ccmEnabled) { BayerFormat bayerFormat = BayerFormat::fromPixelFormat(inputFormat); bool addAlphaByte = false; xShift_ = 0; swapRedBlueGains_ = false; auto invalidFmt = []() -> int { LOG(Debayer, Error) << "Unsupported input output format combination"; return -EINVAL; }; switch (outputFormat) { case formats::XRGB8888: case formats::ARGB8888: addAlphaByte = true; [[fallthrough]]; case formats::RGB888: break; case formats::XBGR8888: case formats::ABGR8888: addAlphaByte = true; [[fallthrough]]; case formats::BGR888: /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */ swapRedBlueGains_ = true; switch (bayerFormat.order) { case BayerFormat::BGGR: bayerFormat.order = BayerFormat::RGGB; break; case BayerFormat::GBRG: bayerFormat.order = BayerFormat::GRBG; break; case BayerFormat::GRBG: bayerFormat.order = BayerFormat::GBRG; break; case BayerFormat::RGGB: bayerFormat.order = BayerFormat::BGGR; break; default: return invalidFmt(); } break; default: return invalidFmt(); } if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) && bayerFormat.packing == BayerFormat::Packing::None && isStandardBayerOrder(bayerFormat.order)) { switch (bayerFormat.bitDepth) { case 8: SET_DEBAYER_METHODS(debayer8_BGBG_BGR888, debayer8_GRGR_BGR888) break; case 10: SET_DEBAYER_METHODS(debayer10_BGBG_BGR888, debayer10_GRGR_BGR888) break; case 12: SET_DEBAYER_METHODS(debayer12_BGBG_BGR888, debayer12_GRGR_BGR888) break; } setupStandardBayerOrder(bayerFormat.order); return 0; } if (bayerFormat.bitDepth == 10 && bayerFormat.packing == BayerFormat::Packing::CSI2) { switch (bayerFormat.order) { case BayerFormat::BGGR: SET_DEBAYER_METHODS(debayer10P_BGBG_BGR888, debayer10P_GRGR_BGR888) return 0; case BayerFormat::GBRG: SET_DEBAYER_METHODS(debayer10P_GBGB_BGR888, debayer10P_RGRG_BGR888) return 0; case BayerFormat::GRBG: SET_DEBAYER_METHODS(debayer10P_GRGR_BGR888, debayer10P_BGBG_BGR888) return 0; case BayerFormat::RGGB: SET_DEBAYER_METHODS(debayer10P_RGRG_BGR888, debayer10P_GBGB_BGR888) return 0; default: break; } } return invalidFmt(); } int DebayerCpu::configure(const StreamConfiguration &inputCfg, const std::vector> &outputCfgs, bool ccmEnabled) { if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) return -EINVAL; if (stats_->configure(inputCfg, threads_.size()) != 0) return -EINVAL; const Size &statsPatternSize = stats_->patternSize(); if (inputConfig_.patternSize.width != statsPatternSize.width || inputConfig_.patternSize.height != statsPatternSize.height) { LOG(Debayer, Error) << "mismatching stats and debayer pattern sizes for " << inputCfg.pixelFormat.toString(); return -EINVAL; } inputConfig_.stride = inputCfg.stride; if (outputCfgs.size() != 1) { LOG(Debayer, Error) << "Unsupported number of output streams: " << outputCfgs.size(); return -EINVAL; } const StreamConfiguration &outputCfg = outputCfgs[0]; SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); std::tie(outputConfig_.stride, outputConfig_.frameSize) = strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { LOG(Debayer, Error) << "Invalid output size/stride: " << "\n " << outputCfg.size << " (" << outSizeRange << ")" << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; return -EINVAL; } int ret = setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat, ccmEnabled); if (ret != 0) return -EINVAL; ccmEnabled_ = ccmEnabled; /* * Lookup tables must be initialized because the initial value is used for * the first two frames, i.e. until stats processing starts providing its * own parameters. Let's enforce recomputing lookup tables by setting the * stored last used gamma to an out-of-range value. */ params_.gamma = 1.0; window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & ~(inputConfig_.patternSize.width - 1); window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & ~(inputConfig_.patternSize.height - 1); window_.width = outputCfg.size.width; window_.height = outputCfg.size.height; /* * Set the stats window to the whole processed window. Its coordinates are * relative to the debayered area since debayering passes only the part of * data to be processed to the stats; see SwStatsCpu::setWindow. */ stats_->setWindow(Rectangle(window_.size())); unsigned int yStart = 0; unsigned int linesPerThread = (window_.height / threads_.size()) & ~(inputConfig_.patternSize.height - 1); unsigned int i; for (i = 0; i < (threads_.size() - 1); i++) { threads_[i]->configure(yStart, yStart + linesPerThread); yStart += linesPerThread; } threads_[i]->configure(yStart, window_.height); return 0; } /** * \brief Configure thread to process a specific part of the image * \param[in] yStart y coordinate of first line to process * \param[in] yEnd y coordinate of the line at which to stop processing * * Configure the thread to process lines from yStart to yEnd - 1. */ void DebayerCpuThread::configure(unsigned int yStart, unsigned int yEnd) { Debayer::DebayerInputConfig &inputConfig = debayer_->inputConfig_; yStart_ = yStart; yEnd_ = yEnd; /* pad with patternSize.Width on both left and right side */ lineBufferPadding_ = inputConfig.patternSize.width * inputConfig.bpp / 8; lineBufferLength_ = debayer_->window_.width * inputConfig.bpp / 8 + 2 * lineBufferPadding_; if (enableInputMemcpy_) { for (unsigned int i = 0; i <= inputConfig.patternSize.height; i++) lineBuffers_[i].resize(lineBufferLength_); } } /* * Get width and height at which the bayer-pattern repeats. * Return pattern-size or an empty Size for an unsupported inputFormat. */ Size DebayerCpu::patternSize(PixelFormat inputFormat) { DebayerCpu::DebayerInputConfig config; if (getInputConfig(inputFormat, config) != 0) return {}; return config.patternSize; } std::vector DebayerCpu::formats(PixelFormat inputFormat) { DebayerCpu::DebayerInputConfig config; if (getInputConfig(inputFormat, config) != 0) return std::vector(); return config.outputFormats; } std::tuple DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) { DebayerCpu::DebayerOutputConfig config; if (getOutputConfig(outputFormat, config) != 0) return std::make_tuple(0, 0); /* round up to multiple of 8 for 64 bits alignment */ unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; return std::make_tuple(stride, stride * size.height); } void DebayerCpuThread::setupInputMemcpy(const uint8_t *linePointers[]) { const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height; if (!enableInputMemcpy_) return; for (unsigned int i = 0; i < patternHeight; i++) { memcpy(lineBuffers_[i].data(), linePointers[i + 1] - lineBufferPadding_, lineBufferLength_); linePointers[i + 1] = lineBuffers_[i].data() + lineBufferPadding_; } /* Point lineBufferIndex_ to first unused lineBuffer */ lineBufferIndex_ = patternHeight; } void DebayerCpuThread::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) { const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height; for (unsigned int i = 0; i < patternHeight; i++) linePointers[i] = linePointers[i + 1]; linePointers[patternHeight] = src + (patternHeight / 2) * (int)debayer_->inputConfig_.stride; } void DebayerCpuThread::memcpyNextLine(const uint8_t *linePointers[]) { const unsigned int patternHeight = debayer_->inputConfig_.patternSize.height; if (!enableInputMemcpy_) return; memcpy(lineBuffers_[lineBufferIndex_].data(), linePointers[patternHeight] - lineBufferPadding_, lineBufferLength_); linePointers[patternHeight] = lineBuffers_[lineBufferIndex_].data() + lineBufferPadding_; lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); } /** * \brief Process part of the image assigned to this debayer thread * \param[in] frame The frame number * \param[in] src The source buffer * \param[in] dst The destination buffer */ void DebayerCpuThread::process(uint32_t frame, const uint8_t *src, uint8_t *dst) { Rectangle &window = debayer_->window_; /* Adjust src to top left corner of the window */ src += (window.y + yStart_) * debayer_->inputConfig_.stride + window.x * debayer_->inputConfig_.bpp / 8; /* Adjust dst for yStart_ */ dst += yStart_ * debayer_->outputConfig_.stride; if (debayer_->inputConfig_.patternSize.height == 2) process2(frame, src, dst); else process4(frame, src, dst); debayer_->workPendingMutex_.lock(); debayer_->workPending_ &= ~(1 << threadIndex_); debayer_->workPendingMutex_.unlock(); debayer_->workPendingCv_.notify_one(); } void DebayerCpuThread::process2(uint32_t frame, const uint8_t *src, uint8_t *dst) { unsigned int outputStride = debayer_->outputConfig_.stride; unsigned int inputStride = debayer_->inputConfig_.stride; Rectangle &window = debayer_->window_; unsigned int yEnd = yEnd_; /* Holds [0] previous- [1] current- [2] next-line */ const uint8_t *linePointers[3]; /* [x] becomes [x - 1] after initial shiftLinePointers() call */ if (window.y + yStart_) { linePointers[1] = src - inputStride; /* previous-line */ linePointers[2] = src; } else { /* Top line, use the next line as prev line */ linePointers[1] = src + inputStride; linePointers[2] = src; } if (window.y == 0 && yEnd_ == window.height) { /* * Last 2 lines also need special handling. * (And configure() ensures that yEnd >= 2.) */ yEnd -= 2; } setupInputMemcpy(linePointers); /* * Note y is the line-number *inside* the window, since stats_' window * is the stats window inside/relative to the debayer window. IOW for * single thread rendering y goes from 0 to window.height. */ for (unsigned int y = yStart_; y < yEnd; y += 2) { shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->stats_->processLine0(frame, y, linePointers, threadIndex_); debayer_->debayer0(dst, linePointers); src += inputStride; dst += outputStride; shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->debayer1(dst, linePointers); src += inputStride; dst += outputStride; } if (window.y == 0 && yEnd_ == window.height) { shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->stats_->processLine0(frame, yEnd, linePointers, threadIndex_); debayer_->debayer0(dst, linePointers); src += inputStride; dst += outputStride; shiftLinePointers(linePointers, src); /* next line may point outside of src, use prev. */ linePointers[2] = linePointers[0]; debayer_->debayer1(dst, linePointers); src += inputStride; dst += outputStride; } } void DebayerCpuThread::process4(uint32_t frame, const uint8_t *src, uint8_t *dst) { unsigned int outputStride = debayer_->outputConfig_.stride; unsigned int inputStride = debayer_->inputConfig_.stride; /* * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line * [3] 1-line-down [4] 2-lines-down. */ const uint8_t *linePointers[5]; /* [x] becomes [x - 1] after initial shiftLinePointers() call */ linePointers[1] = src - 2 * inputStride; linePointers[2] = src - inputStride; linePointers[3] = src; linePointers[4] = src + inputStride; setupInputMemcpy(linePointers); /* * Note y is the line-number *inside* the window, since stats_' window * is the stats window inside/relative to the debayer window. IOW for * single thread rendering y goes from 0 to window.height. */ for (unsigned int y = yStart_; y < yEnd_; y += 4) { shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->stats_->processLine0(frame, y, linePointers, threadIndex_); debayer_->debayer0(dst, linePointers); src += inputStride; dst += outputStride; shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->debayer1(dst, linePointers); src += inputStride; dst += outputStride; shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->stats_->processLine2(frame, y, linePointers, threadIndex_); debayer_->debayer2(dst, linePointers); src += inputStride; dst += outputStride; shiftLinePointers(linePointers, src); memcpyNextLine(linePointers); debayer_->debayer3(dst, linePointers); src += inputStride; dst += outputStride; } } void DebayerCpu::updateGammaTable(const DebayerParams ¶ms) { const RGB blackLevel = params.blackLevel; /* Take let's say the green channel black level */ const unsigned int blackIndex = blackLevel[1] * gammaTable_.size(); const float gamma = params.gamma; const float contrastExp = params.contrastExp; const float divisor = gammaTable_.size() - blackIndex - 1.0; for (unsigned int i = blackIndex; i < gammaTable_.size(); i++) { float normalized = (i - blackIndex) / divisor; /* Convert 0..2 to 0..infinity; avoid actual inifinity at tan(pi/2) */ /* Apply simple S-curve */ if (normalized < 0.5) normalized = 0.5 * std::pow(normalized / 0.5, contrastExp); else normalized = 1.0 - 0.5 * std::pow((1.0 - normalized) / 0.5, contrastExp); gammaTable_[i] = UINT8_MAX * std::pow(normalized, gamma); } /* * Due to CCM operations, the table lookup may reach indices below the black * level. Let's set the table values below black level to the minimum * non-black value to prevent problems when the minimum value is * significantly non-zero (for example, when the image should be all grey). */ std::fill(gammaTable_.begin(), gammaTable_.begin() + blackIndex, gammaTable_[blackIndex]); } void DebayerCpu::updateLookupTables(const DebayerParams ¶ms) { const bool gammaUpdateNeeded = params.gamma != params_.gamma || params.blackLevel != params_.blackLevel || params.contrastExp != params_.contrastExp; if (gammaUpdateNeeded) updateGammaTable(params); auto matrixChanged = [](const Matrix &m1, const Matrix &m2) -> bool { return !std::equal(m1.data().begin(), m1.data().end(), m2.data().begin()); }; const unsigned int gammaTableSize = gammaTable_.size(); const double div = static_cast(kRGBLookupSize) / gammaTableSize; if (ccmEnabled_) { if (gammaUpdateNeeded || matrixChanged(params.combinedMatrix, params_.combinedMatrix)) { auto &red = swapRedBlueGains_ ? blueCcm_ : redCcm_; auto &green = greenCcm_; auto &blue = swapRedBlueGains_ ? redCcm_ : blueCcm_; const unsigned int redIndex = swapRedBlueGains_ ? 2 : 0; const unsigned int greenIndex = 1; const unsigned int blueIndex = swapRedBlueGains_ ? 0 : 2; for (unsigned int i = 0; i < kRGBLookupSize; i++) { red[i].r = std::round(i * params.combinedMatrix[redIndex][0]); red[i].g = std::round(i * params.combinedMatrix[greenIndex][0]); red[i].b = std::round(i * params.combinedMatrix[blueIndex][0]); green[i].r = std::round(i * params.combinedMatrix[redIndex][1]); green[i].g = std::round(i * params.combinedMatrix[greenIndex][1]); green[i].b = std::round(i * params.combinedMatrix[blueIndex][1]); blue[i].r = std::round(i * params.combinedMatrix[redIndex][2]); blue[i].g = std::round(i * params.combinedMatrix[greenIndex][2]); blue[i].b = std::round(i * params.combinedMatrix[blueIndex][2]); gammaLut_[i] = gammaTable_[i / div]; } } } else { if (gammaUpdateNeeded || params.gains != params_.gains) { auto &gains = params.gains; auto &red = swapRedBlueGains_ ? blue_ : red_; auto &green = green_; auto &blue = swapRedBlueGains_ ? red_ : blue_; for (unsigned int i = 0; i < kRGBLookupSize; i++) { /* Apply gamma after gain! */ const RGB lutGains = (gains * i / div).min(gammaTableSize - 1); red[i] = gammaTable_[static_cast(lutGains.r())]; green[i] = gammaTable_[static_cast(lutGains.g())]; blue[i] = gammaTable_[static_cast(lutGains.b())]; } } } LOG(Debayer, Debug) << "Debayer parameters: blackLevel=" << params.blackLevel << "; gamma=" << params.gamma << "; contrastExp=" << params.contrastExp << "; gains=" << params.gains << "; matrix=" << params.combinedMatrix; params_ = params; } void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms) { bench_.startFrame(); std::vector dmaSyncers; dmaSyncBegin(dmaSyncers, input, output); updateLookupTables(params); /* Copy metadata from the input buffer */ FrameMetadata &metadata = output->_d()->metadata(); metadata.status = input->metadata().status; metadata.sequence = input->metadata().sequence; metadata.timestamp = input->metadata().timestamp; MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); if (!in.isValid() || !out.isValid()) { LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; metadata.status = FrameMetadata::FrameError; return; } stats_->startFrame(frame); workPendingMutex_.lock(); workPending_ = (1 << threads_.size()) - 1; workPendingMutex_.unlock(); for (auto &thread : threads_) thread->invokeMethod(&DebayerCpuThread::process, ConnectionTypeQueued, frame, in.planes()[0].data(), out.planes()[0].data()); { MutexLocker locker(workPendingMutex_); workPendingCv_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(workPendingMutex_) { return workPending_ == 0; }); } metadata.planes()[0].bytesused = out.planes()[0].size(); dmaSyncers.clear(); /* Measure before emitting signals */ bench_.finishFrame(); /* * Buffer ids are currently not used, so pass zeros as its parameter. * * \todo Pass real bufferId once stats buffer passing is changed. */ stats_->finishFrame(frame, 0); outputBufferReady.emit(output); inputBufferReady.emit(input); } int DebayerCpu::start() { for (auto &thread : threads_) thread->start(); return 0; } void DebayerCpu::stop() { for (auto &thread : threads_) thread->exit(); for (auto &thread : threads_) thread->wait(); } SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize) { Size patternSize = this->patternSize(inputFormat); unsigned int borderHeight = patternSize.height; if (patternSize.isNull()) return {}; /* No need for top/bottom border with a pattern height of 2 */ if (patternSize.height == 2) borderHeight = 0; /* * For debayer interpolation a border is kept around the entire image * and the minimum output size is pattern-height x pattern-width. */ if (inputSize.width < (3 * patternSize.width) || inputSize.height < (2 * borderHeight + patternSize.height)) { LOG(Debayer, Warning) << "Input format size too small: " << inputSize.toString(); return {}; } return SizeRange(Size(patternSize.width, patternSize.height), Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1), (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)), patternSize.width, patternSize.height); } } /* namespace libcamera */