software_isp: debayer_cpu: Add multi-threading support
Add CPU soft ISP multi-threading support. Benchmark results for the Arduino Uno-Q with a weak CPU which is good for performance testing, all numbers with an IMX219 running at 3280x2464 -> 3272x2464: 1 thread : 147ms / frame, ~6.5 fps 2 threads: 80ms / frame, ~12.5 fps 3 threads: 65ms / frame, ~15 fps Adding a 4th thread does not improve performance. Tested-by: Barnabás Pőcze <barnabas.pocze@ideasonboard.com> # ThinkPad X1 Yoga Gen 7 + ov2740 Reviewed-by: Milan Zamazal <mzamazal@redhat.com> Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
This commit is contained in:
committed by
Kieran Bingham
parent
41d17f8211
commit
e948ada380
@@ -51,6 +51,7 @@ file structure:
|
||||
measure:
|
||||
skip: # non-negative integer, frames to skip initially
|
||||
number: # non-negative integer, frames to measure
|
||||
threads: # integer >= 1, number of render threads to use, default 2
|
||||
|
||||
Configuration file example
|
||||
--------------------------
|
||||
@@ -84,6 +85,7 @@ Configuration file example
|
||||
measure:
|
||||
skip: 50
|
||||
number: 30
|
||||
threads: 2
|
||||
|
||||
List of variables and configuration options
|
||||
-------------------------------------------
|
||||
@@ -167,6 +169,12 @@ software_isp.measure.skip, software_isp.measure.number
|
||||
|
||||
Example `number` value: ``30``
|
||||
|
||||
software_isp.threads
|
||||
Number of render threads the software ISP uses when using the CPU.
|
||||
This must be between 1 and 8 and the default is 2.
|
||||
|
||||
Example value: ``2``
|
||||
|
||||
Further details
|
||||
---------------
|
||||
|
||||
|
||||
@@ -76,6 +76,7 @@ DebayerCpuThread::DebayerCpuThread(DebayerCpu *debayer, unsigned int threadIndex
|
||||
debayer_(debayer), threadIndex_(threadIndex),
|
||||
enableInputMemcpy_(enableInputMemcpy)
|
||||
{
|
||||
moveToThread(this);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -107,11 +108,15 @@ DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats, const GlobalConfigurat
|
||||
bool enableInputMemcpy =
|
||||
configuration.option<bool>({ "software_isp", "copy_input_buffer" }).value_or(true);
|
||||
|
||||
/* Just one thread object for now, which will be called inline rather than async */
|
||||
threads_.resize(1);
|
||||
unsigned int threadCount =
|
||||
configuration.option<unsigned int>({ "software_isp", "threads" }).value_or(kDefaultThreads);
|
||||
threadCount = std::clamp(threadCount, kMinThreads, kMaxThreads);
|
||||
threads_.resize(threadCount);
|
||||
|
||||
for (unsigned int i = 0; i < threads_.size(); i++)
|
||||
threads_[i] = std::make_unique<DebayerCpuThread>(this, i, enableInputMemcpy);
|
||||
|
||||
LOG(Debayer, Debug) << "Thread count " << threadCount;
|
||||
}
|
||||
|
||||
DebayerCpu::~DebayerCpu() = default;
|
||||
@@ -746,6 +751,11 @@ void DebayerCpuThread::process(uint32_t frame, const uint8_t *src, uint8_t *dst)
|
||||
process2(frame, src, dst);
|
||||
else
|
||||
process4(frame, src, dst);
|
||||
|
||||
debayer_->workPendingMutex_.lock();
|
||||
debayer_->workPending_ &= ~(1 << threadIndex_);
|
||||
debayer_->workPendingMutex_.unlock();
|
||||
debayer_->workPendingCv_.notify_one();
|
||||
}
|
||||
|
||||
void DebayerCpuThread::process2(uint32_t frame, const uint8_t *src, uint8_t *dst)
|
||||
@@ -985,7 +995,21 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
|
||||
|
||||
stats_->startFrame(frame);
|
||||
|
||||
threads_[0]->process(frame, in.planes()[0].data(), out.planes()[0].data());
|
||||
workPendingMutex_.lock();
|
||||
workPending_ = (1 << threads_.size()) - 1;
|
||||
workPendingMutex_.unlock();
|
||||
|
||||
for (auto &thread : threads_)
|
||||
thread->invokeMethod(&DebayerCpuThread::process,
|
||||
ConnectionTypeQueued, frame,
|
||||
in.planes()[0].data(), out.planes()[0].data());
|
||||
|
||||
{
|
||||
MutexLocker locker(workPendingMutex_);
|
||||
workPendingCv_.wait(locker, [&]() LIBCAMERA_TSA_REQUIRES(workPendingMutex_) {
|
||||
return workPending_ == 0;
|
||||
});
|
||||
}
|
||||
|
||||
metadata.planes()[0].bytesused = out.planes()[0].size();
|
||||
|
||||
@@ -1004,6 +1028,23 @@ void DebayerCpu::process(uint32_t frame, FrameBuffer *input, FrameBuffer *output
|
||||
inputBufferReady.emit(input);
|
||||
}
|
||||
|
||||
int DebayerCpu::start()
|
||||
{
|
||||
for (auto &thread : threads_)
|
||||
thread->start();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DebayerCpu::stop()
|
||||
{
|
||||
for (auto &thread : threads_)
|
||||
thread->exit();
|
||||
|
||||
for (auto &thread : threads_)
|
||||
thread->wait();
|
||||
}
|
||||
|
||||
SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)
|
||||
{
|
||||
Size patternSize = this->patternSize(inputFormat);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include <libcamera/base/mutex.h>
|
||||
#include <libcamera/base/object.h>
|
||||
|
||||
#include "libcamera/internal/bayer_format.h"
|
||||
@@ -41,6 +42,8 @@ public:
|
||||
std::tuple<unsigned int, unsigned int>
|
||||
strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
|
||||
void process(uint32_t frame, FrameBuffer *input, FrameBuffer *output, const DebayerParams ¶ms);
|
||||
int start();
|
||||
void stop();
|
||||
SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
|
||||
const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
|
||||
|
||||
@@ -144,6 +147,13 @@ private:
|
||||
std::unique_ptr<SwStatsCpu> stats_;
|
||||
unsigned int xShift_; /* Offset of 0/1 applied to window_.x */
|
||||
|
||||
static constexpr unsigned int kMinThreads = 1;
|
||||
static constexpr unsigned int kMaxThreads = 8;
|
||||
static constexpr unsigned int kDefaultThreads = 2;
|
||||
|
||||
unsigned int workPending_ LIBCAMERA_TSA_GUARDED_BY(workPendingMutex_);
|
||||
Mutex workPendingMutex_;
|
||||
ConditionVariable workPendingCv_;
|
||||
std::vector<std::unique_ptr<DebayerCpuThread>> threads_;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user