software_isp: swstats_cpu: Prepare for multi-threading support
Make the storage used to accumulate the RGB sums and the Y histogram value a vector of SwIspStats objects instead of a single object so that when using multi-threading every thread can use its own storage to collect intermediate stats to avoid cache-line bouncing. Benchmarking with the GPU-ISP which does separate swstats benchmarking, on the Arduino Uno-Q which has a weak CPU which is good for performance testing, shows 20ms to generate stats for a 3272x2464 frame both before and after this change. Reviewed-by: Milan Zamazal <mzamazal@redhat.com> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
This commit is contained in:
committed by
Kieran Bingham
parent
2a1c75504c
commit
6e53e72e96
@@ -12,6 +12,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include <libcamera/base/signal.h>
|
||||
|
||||
@@ -51,13 +52,13 @@ public:
|
||||
|
||||
const Size &patternSize() { return patternSize_; }
|
||||
|
||||
int configure(const StreamConfiguration &inputCfg);
|
||||
int configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount = 1);
|
||||
void setWindow(const Rectangle &window);
|
||||
void startFrame(uint32_t frame);
|
||||
void finishFrame(uint32_t frame, uint32_t bufferId);
|
||||
void processFrame(uint32_t frame, uint32_t bufferId, FrameBuffer *input);
|
||||
|
||||
void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])
|
||||
void processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)
|
||||
{
|
||||
if (frame % kStatPerNumFrames)
|
||||
return;
|
||||
@@ -66,10 +67,10 @@ public:
|
||||
y >= (window_.y + window_.height))
|
||||
return;
|
||||
|
||||
(this->*stats0_)(src);
|
||||
(this->*stats0_)(src, stats_[statsBufferIndex]);
|
||||
}
|
||||
|
||||
void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])
|
||||
void processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)
|
||||
{
|
||||
if (frame % kStatPerNumFrames)
|
||||
return;
|
||||
@@ -78,25 +79,25 @@ public:
|
||||
y >= (window_.y + window_.height))
|
||||
return;
|
||||
|
||||
(this->*stats2_)(src);
|
||||
(this->*stats2_)(src, stats_[statsBufferIndex]);
|
||||
}
|
||||
|
||||
Signal<uint32_t, uint32_t> statsReady;
|
||||
|
||||
private:
|
||||
using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
|
||||
using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[], SwIspStats &stats);
|
||||
using processFrameFn = void (SwStatsCpu::*)(MappedFrameBuffer &in);
|
||||
|
||||
int setupStandardBayerOrder(BayerFormat::Order order);
|
||||
/* Bayer 8 bpp unpacked */
|
||||
void statsBGGR8Line0(const uint8_t *src[]);
|
||||
void statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats);
|
||||
/* Bayer 10 bpp unpacked */
|
||||
void statsBGGR10Line0(const uint8_t *src[]);
|
||||
void statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats);
|
||||
/* Bayer 12 bpp unpacked */
|
||||
void statsBGGR12Line0(const uint8_t *src[]);
|
||||
void statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats);
|
||||
/* Bayer 10 bpp packed */
|
||||
void statsBGGR10PLine0(const uint8_t *src[]);
|
||||
void statsGBRG10PLine0(const uint8_t *src[]);
|
||||
void statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats);
|
||||
void statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats);
|
||||
|
||||
void processBayerFrame2(MappedFrameBuffer &in);
|
||||
|
||||
@@ -116,8 +117,8 @@ private:
|
||||
unsigned int xShift_;
|
||||
unsigned int stride_;
|
||||
|
||||
std::vector<SwIspStats> stats_;
|
||||
SharedMemObject<SwIspStats> sharedStats_;
|
||||
SwIspStats stats_;
|
||||
Benchmark bench_;
|
||||
};
|
||||
|
||||
|
||||
@@ -74,11 +74,12 @@ namespace libcamera {
|
||||
*/
|
||||
|
||||
/**
|
||||
* \fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[])
|
||||
* \fn void SwStatsCpu::processLine0(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)
|
||||
* \brief Process line 0
|
||||
* \param[in] frame The frame number
|
||||
* \param[in] y The y coordinate.
|
||||
* \param[in] src The input data.
|
||||
* \param[in] y The y coordinate
|
||||
* \param[in] src The input data
|
||||
* \param[in] statsBufferIndex Index of stats buffer to use for multi-threading
|
||||
*
|
||||
* This function processes line 0 for input formats with
|
||||
* patternSize height == 1.
|
||||
@@ -97,14 +98,18 @@ namespace libcamera {
|
||||
* to the line in plane 0, etc.
|
||||
*
|
||||
* For non Bayer single plane input data only a single src pointer is required.
|
||||
*
|
||||
* The statsBufferIndex value must be less than the statsBufferCount value passed
|
||||
* to configure().
|
||||
*/
|
||||
|
||||
/**
|
||||
* \fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[])
|
||||
* \fn void SwStatsCpu::processLine2(uint32_t frame, unsigned int y, const uint8_t *src[], unsigned int statsBufferIndex = 0)
|
||||
* \brief Process line 2 and 3
|
||||
* \param[in] frame The frame number
|
||||
* \param[in] y The y coordinate.
|
||||
* \param[in] src The input data.
|
||||
* \param[in] y The y coordinate
|
||||
* \param[in] src The input data
|
||||
* \param[in] statsBufferIndex Index of stats buffer to use for multi-threading
|
||||
*
|
||||
* This function processes line 2 and 3 for input formats with
|
||||
* patternSize height == 4.
|
||||
@@ -182,14 +187,14 @@ static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */
|
||||
yVal = r * kRedYMul; \
|
||||
yVal += g * kGreenYMul; \
|
||||
yVal += b * kBlueYMul; \
|
||||
stats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
|
||||
stats.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
|
||||
|
||||
#define SWSTATS_FINISH_LINE_STATS() \
|
||||
stats_.sum_.r() += sumR; \
|
||||
stats_.sum_.g() += sumG; \
|
||||
stats_.sum_.b() += sumB;
|
||||
stats.sum_.r() += sumR; \
|
||||
stats.sum_.g() += sumG; \
|
||||
stats.sum_.b() += sumB;
|
||||
|
||||
void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
|
||||
void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[], SwIspStats &stats)
|
||||
{
|
||||
const uint8_t *src0 = src[1] + window_.x;
|
||||
const uint8_t *src1 = src[2] + window_.x;
|
||||
@@ -214,7 +219,7 @@ void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
|
||||
SWSTATS_FINISH_LINE_STATS()
|
||||
}
|
||||
|
||||
void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
|
||||
void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[], SwIspStats &stats)
|
||||
{
|
||||
const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
|
||||
const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
|
||||
@@ -240,7 +245,7 @@ void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
|
||||
SWSTATS_FINISH_LINE_STATS()
|
||||
}
|
||||
|
||||
void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
|
||||
void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[], SwIspStats &stats)
|
||||
{
|
||||
const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
|
||||
const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
|
||||
@@ -266,7 +271,7 @@ void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
|
||||
SWSTATS_FINISH_LINE_STATS()
|
||||
}
|
||||
|
||||
void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
|
||||
void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[], SwIspStats &stats)
|
||||
{
|
||||
const uint8_t *src0 = src[1] + window_.x * 5 / 4;
|
||||
const uint8_t *src1 = src[2] + window_.x * 5 / 4;
|
||||
@@ -292,7 +297,7 @@ void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
|
||||
SWSTATS_FINISH_LINE_STATS()
|
||||
}
|
||||
|
||||
void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
|
||||
void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[], SwIspStats &stats)
|
||||
{
|
||||
const uint8_t *src0 = src[1] + window_.x * 5 / 4;
|
||||
const uint8_t *src1 = src[2] + window_.x * 5 / 4;
|
||||
@@ -332,8 +337,10 @@ void SwStatsCpu::startFrame(uint32_t frame)
|
||||
if (window_.width == 0)
|
||||
LOG(SwStatsCpu, Error) << "Calling startFrame() without setWindow()";
|
||||
|
||||
stats_.sum_ = RGB<uint64_t>({ 0, 0, 0 });
|
||||
stats_.yHistogram.fill(0);
|
||||
for (auto &s : stats_) {
|
||||
s.sum_ = RGB<uint64_t>({ 0, 0, 0 });
|
||||
s.yHistogram.fill(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -345,8 +352,19 @@ void SwStatsCpu::startFrame(uint32_t frame)
|
||||
*/
|
||||
void SwStatsCpu::finishFrame(uint32_t frame, uint32_t bufferId)
|
||||
{
|
||||
stats_.valid = frame % kStatPerNumFrames == 0;
|
||||
*sharedStats_ = stats_;
|
||||
bool valid = frame % kStatPerNumFrames == 0;
|
||||
|
||||
if (valid) {
|
||||
sharedStats_->sum_ = RGB<uint64_t>({ 0, 0, 0 });
|
||||
sharedStats_->yHistogram.fill(0);
|
||||
for (const auto &s : stats_) {
|
||||
sharedStats_->sum_ += s.sum_;
|
||||
for (unsigned int j = 0; j < SwIspStats::kYHistogramSize; j++)
|
||||
sharedStats_->yHistogram[j] += s.yHistogram[j];
|
||||
}
|
||||
}
|
||||
|
||||
sharedStats_->valid = valid;
|
||||
statsReady.emit(frame, bufferId);
|
||||
}
|
||||
|
||||
@@ -389,12 +407,14 @@ int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)
|
||||
/**
|
||||
* \brief Configure the statistics object for the passed in input format
|
||||
* \param[in] inputCfg The input format
|
||||
* \param[in] statsBufferCount number of internal stats buffers to use for multi-threading
|
||||
*
|
||||
* \return 0 on success, a negative errno value on failure
|
||||
*/
|
||||
int SwStatsCpu::configure(const StreamConfiguration &inputCfg)
|
||||
int SwStatsCpu::configure(const StreamConfiguration &inputCfg, unsigned int statsBufferCount)
|
||||
{
|
||||
stride_ = inputCfg.stride;
|
||||
stats_.resize(statsBufferCount);
|
||||
|
||||
BayerFormat bayerFormat =
|
||||
BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
|
||||
@@ -504,7 +524,7 @@ void SwStatsCpu::processBayerFrame2(MappedFrameBuffer &in)
|
||||
/* linePointers[0] is not used by any stats0_ functions */
|
||||
linePointers[1] = src;
|
||||
linePointers[2] = src + stride_;
|
||||
(this->*stats0_)(linePointers);
|
||||
(this->*stats0_)(linePointers, stats_[0]);
|
||||
src += stride_ * 2;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user