darktable-org · LebedevRI · Apr 13, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024
@@ -61,7 +61,7 @@ template <typename C> void BM_Broadcast(benchmark::State& state) {
   int64_t numBytes = state.range(0);
   const int bytesPerChunk = sizeof(T);
   const auto numChunks =
-      implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
+      implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
   numBytes = bytesPerChunk * numChunks;
 
   std::vector<T, DefaultInitAllocatorAdaptor<T, std::allocator<T>>> output;
@@ -105,7 +105,7 @@ template <typename C> void BM_Copy(benchmark::State& state) {
   int64_t numBytes = state.range(0);
   const int bytesPerChunk = sizeof(T);
   const auto numChunks =
-      implicit_cast<int>(roundUpDivision(numBytes, bytesPerChunk));
+      implicit_cast<int>(roundUpDivisionSafe(numBytes, bytesPerChunk));
   numBytes = bytesPerChunk * numChunks;
 
   std::vector<uint8_t,

@@ -93,7 +93,7 @@ __attribute__((noinline)) __attribute__((visibility("default")))
 JPEGStuffedByteStreamGenerator::JPEGStuffedByteStreamGenerator(
     const int64_t numBytesMax, bool AppendStuffingByte) {
   invariant(numBytesMax > 0);
-  const auto expectedOverhead = roundUpDivision(numBytesMax, 100); // <=1%
+  const auto expectedOverhead = roundUpDivisionSafe(numBytesMax, 100); // <=1%
   dataStorage.reserve(implicit_cast<size_t>(numBytesMax + expectedOverhead));
 
   // Here we only need to differentiate between a normal byte,

@@ -103,7 +103,7 @@ struct BitVectorLengthsGenerator final {
     std::random_device rd;
     std::mt19937_64 gen(rd());
 
-    for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivision(
+    for (int64_t numBits = 0; implicit_cast<int64_t>(roundUpDivisionSafe(
                                   numBits, CHAR_BIT)) < maxBytes;) {
       int len = dist(gen);
       numBitsToProduce += len;
@@ -148,7 +148,7 @@ template <typename T, typename C> void BM(benchmark::State& state) {
               DefaultInitAllocatorAdaptor<OutputChunkType,
                                           std::allocator<OutputChunkType>>>
       output;
-  output.reserve(implicit_cast<size_t>(roundUpDivision(
+  output.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
       gen.numBitsToProduce, CHAR_BIT * sizeof(OutputChunkType))));
 
   for (auto _ : state) {

@@ -106,7 +106,7 @@ void BM(benchmark::State& state, bool Stuffed) {
                                           std::allocator<OutputChunkType>>>
       output;
   output.reserve(implicit_cast<size_t>(
-      roundUpDivision(input->size(), sizeof(OutputChunkType))));
+      roundUpDivisionSafe(input->size(), sizeof(OutputChunkType))));
 
   for (auto _ : state) {
     output.clear();

@@ -81,7 +81,7 @@ struct BitStreamCacheLeftInRightOut final : BitStreamCacheBase {
     establishClassInvariants();
     invariant(count >= 0);
     // `count` *could* be larger than `MaxGetBits`.
-    invariant(count != 0);
+    // `count` could be zero.
     invariant(count <= Size);
     invariant(count <= fillLevel);
     cache >>= count;

@@ -48,13 +48,9 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   invariant(state.pos % MinByteStepMultiple == 0);
   invariant(state.fillLevel >= 0);
 
-  auto numBytesRemainingInCache =
-      implicit_cast<int>(roundUpDivision(state.fillLevel, CHAR_BIT));
-  invariant(numBytesRemainingInCache >= 0);
-  invariant(numBytesRemainingInCache <= state.pos);
-
   auto numBytesToBacktrack = implicit_cast<int>(
-      roundUp(numBytesRemainingInCache, MinByteStepMultiple));
+      MinByteStepMultiple *
+      roundUpDivision(state.fillLevel, CHAR_BIT * MinByteStepMultiple));
   invariant(numBytesToBacktrack >= 0);
   invariant(numBytesToBacktrack <= state.pos);
   invariant(numBytesToBacktrack % MinByteStepMultiple == 0);
@@ -67,6 +63,8 @@ ByteStreamPosition<bo> getAsByteStreamPosition(BitStreamPosition<bo> state) {
   res.bytePos = state.pos - numBytesToBacktrack;
   invariant(numBitsToBacktrack >= state.fillLevel);
   res.numBitsToSkip = numBitsToBacktrack - state.fillLevel;
+  invariant(res.numBitsToSkip >= 0);
+  invariant(res.numBitsToSkip < CHAR_BIT * MinByteStepMultiple);
 
   invariant(res.bytePos >= 0);
   invariant(res.bytePos <= state.pos);

@@ -28,6 +28,7 @@
 #include "adt/Invariant.h"
 #include "adt/VariableLengthLoad.h"
 #include "bitstreams/BitStream.h"
+#include "bitstreams/BitStreamPosition.h"
 #include "io/Endianness.h"
 #include "io/IOException.h"
 #include <array>
@@ -192,6 +193,22 @@ class BitStreamer {
     establishClassInvariants();
   }
 
+  void reload() {
+    establishClassInvariants();
+
+    BitStreamPosition<Traits::Tag> state;
+    state.pos = getInputPosition();
+    state.fillLevel = getFillLevel();
+    const auto bsPos = getAsByteStreamPosition(state);
+
+    auto replacement = BitStreamer(replenisher.input);
+    if (bsPos.bytePos != 0)
+      replacement.replenisher.markNumBytesAsConsumed(bsPos.bytePos);
+    replacement.fill();
+    replacement.skipBitsNoFill(bsPos.numBitsToSkip);
+    *this = std::move(replacement);
+  }
+
   void fill(int nbits = Cache::MaxGetBits) {
     establishClassInvariants();
     invariant(nbits >= 0);
@@ -204,6 +221,7 @@ class BitStreamer {
     const auto input = replenisher.getInput();
     const auto numBytes = static_cast<Derived*>(this)->fillCache(input);
     replenisher.markNumBytesAsConsumed(numBytes);
+    invariant(cache.fillLevel >= nbits);
   }
 
   // these methods might be specialized by implementations that support it

@@ -138,6 +138,12 @@ constexpr uint64_t RAWSPEED_READNONE roundUp(uint64_t value,
 
 constexpr uint64_t RAWSPEED_READNONE roundUpDivision(uint64_t value,
                                                      uint64_t div) {
+  invariant(div != 0);
+  return roundUp(value, div) / div;
+}
+
+constexpr uint64_t RAWSPEED_READNONE roundUpDivisionSafe(uint64_t value,
+                                                         uint64_t div) {
   return (value != 0) ? (1 + ((value - 1) / div)) : 0;
 }
 

@@ -393,8 +393,8 @@ class DngOpcodes::PixelOpcode : public ROIOpcode {
     int cpp = ri->getCpp();
     const iRectangle2D& ROI = getRoi();
     const iPoint2D numAffected(
-        implicit_cast<int>(roundUpDivision(getRoi().dim.x, colPitch)),
-        implicit_cast<int>(roundUpDivision(getRoi().dim.y, rowPitch)));
+        implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.x, colPitch)),
+        implicit_cast<int>(roundUpDivisionSafe(getRoi().dim.y, rowPitch)));
     for (int y = 0; y < numAffected.y; ++y) {
       for (int x = 0; x < numAffected.x; ++x) {
         for (auto p = 0U; p < planes; ++p) {
@@ -568,7 +568,7 @@ class DngOpcodes::DeltaRowOrCol : public DeltaRowOrColBase {
     // See PixelOpcode::applyOP(). We will access deltaF/deltaI up to (excl.)
     // either ROI.getWidth() or ROI.getHeight() index. Thus, we need to have
     // either ROI.getRight() or ROI.getBottom() elements in there.
-    if (const auto expectedSize = roundUpDivision(
+    if (const auto expectedSize = roundUpDivisionSafe(
             S::select(getRoi().getWidth(), getRoi().getHeight()),
             S::select(getPitch().x, getPitch().y));
         expectedSize != deltaF_count) {

@@ -201,8 +201,8 @@ void RawImageData::subFrame(iRectangle2D crop) {
 void RawImageData::createBadPixelMap() {
   if (!isAllocated())
     ThrowRDE("(internal) Bad pixel map cannot be allocated before image.");
-  mBadPixelMapPitch =
-      implicit_cast<uint32_t>(roundUp(roundUpDivision(uncropped_dim.x, 8), 16));
+  mBadPixelMapPitch = implicit_cast<uint32_t>(
+      roundUp(roundUpDivisionSafe(uncropped_dim.x, 8), 16));
   assert(mBadPixelMap.empty());
   mBadPixelMap.resize(static_cast<size_t>(mBadPixelMapPitch) * uncropped_dim.y,
                       uint8_t(0));

@@ -328,13 +328,13 @@ void ArwDecoder::DecodeLJpeg(const TiffIFD* raw) {
 
   assert(tilew > 0);
   const auto tilesX =
-      implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
+      implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
   if (!tilesX)
     ThrowRDE("Zero tiles horizontally");
 
   assert(tileh > 0);
   const auto tilesY =
-      implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
+      implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
   if (!tilesY)
     ThrowRDE("Zero tiles vertically");
 

@@ -308,13 +308,13 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {
 
     assert(tilew > 0);
     const auto tilesX =
-        implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.x, tilew));
+        implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.x, tilew));
     if (!tilesX)
       ThrowRDE("Zero tiles horizontally");
 
     assert(tileh > 0);
     const auto tilesY =
-        implicit_cast<uint32_t>(roundUpDivision(mRaw->dim.y, tileh));
+        implicit_cast<uint32_t>(roundUpDivisionSafe(mRaw->dim.y, tileh));
     if (!tilesY)
       ThrowRDE("Zero tiles vertically");
 
@@ -350,7 +350,7 @@ DngDecoder::getTilingDescription(const TiffIFD* raw) const {
                            : mRaw->dim.y;
 
   if (yPerSlice == 0 ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }

@@ -427,8 +427,8 @@ void IiqDecoder::PhaseOneFlatField(ByteStream data, IiqCorr corr) const {
   if (head[2] == 0 || head[3] == 0 || head[4] == 0 || head[5] == 0)
     return;
 
-  auto wide = implicit_cast<int>(roundUpDivision(head[2], head[4]));
-  auto high = implicit_cast<int>(roundUpDivision(head[3], head[5]));
+  auto wide = implicit_cast<int>(roundUpDivisionSafe(head[2], head[4]));
+  auto high = implicit_cast<int>(roundUpDivisionSafe(head[3], head[5]));
 
   std::vector<float> mrow_storage;
   Array2DRef<float> mrow = Array2DRef<float>::create(

@@ -182,7 +182,7 @@ bool NefDecoder::NEFIsUncompressed(const TiffIFD* raw) {
   // We can't just accept this. Some *compressed* NEF's also pass this check :(
   // Thus, let's accept *some* *small* padding.
   const auto requiredInputBits = bitPerPixel * requiredPixels;
-  const auto requiredInputBytes = roundUpDivision(requiredInputBits, 8);
+  const auto requiredInputBytes = roundUpDivisionSafe(requiredInputBits, 8);
   // While we might have more *pixels* than needed, it does not nessesairly mean
   // that we have more input *bytes*. We might be off by a few pixels, and with
   // small image dimensions and bpp, we might still be in the same byte.
@@ -229,7 +229,7 @@ void NefDecoder::DecodeUncompressed() const {
   }
 
   if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }

@@ -164,7 +164,7 @@ void OrfDecoder::decodeUncompressedInterleaved(ByteStream s, uint32_t w,
 
   int inputPitchBytes = inputPitchBits / 8;
 
-  const auto numEvenLines = implicit_cast<int>(roundUpDivision(h, 2));
+  const auto numEvenLines = implicit_cast<int>(roundUpDivisionSafe(h, 2));
   const auto evenLinesInput = s.getStream(numEvenLines, inputPitchBytes)
                                   .peekRemainingBuffer()
                                   .getAsArray1DRef();

@@ -74,7 +74,7 @@ void RawDecoder::decodeUncompressed(const TiffIFD* rawIFD,
   }
 
   if (yPerSlice == 0 || yPerSlice > static_cast<uint32_t>(mRaw->dim.y) ||
-      roundUpDivision(mRaw->dim.y, yPerSlice) != counts->count) {
+      roundUpDivisionSafe(mRaw->dim.y, yPerSlice) != counts->count) {
     ThrowRDE("Invalid y per slice %u or strip count %u (height = %u)",
              yPerSlice, counts->count, mRaw->dim.y);
   }

@@ -55,8 +55,8 @@ struct DngTilingDescription final {
 
   DngTilingDescription(const iPoint2D& dim_, uint32_t tileW_, uint32_t tileH_)
       : dim(dim_), tileW(tileW_), tileH(tileH_),
-        tilesX(implicit_cast<uint32_t>(roundUpDivision(dim.x, tileW))),
-        tilesY(implicit_cast<uint32_t>(roundUpDivision(dim.y, tileH))),
+        tilesX(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.x, tileW))),
+        tilesY(implicit_cast<uint32_t>(roundUpDivisionSafe(dim.y, tileH))),
         numTiles(tilesX * tilesY) {
     invariant(dim.area() > 0);
     invariant(tileW > 0);

@@ -927,7 +927,7 @@ FujiDecompressor::FujiHeader::operator bool() const {
        raw_rounded_width % block_size ||
        raw_rounded_width - raw_width >= block_size || blocks_in_row > 0x10 ||
        blocks_in_row == 0 || blocks_in_row != raw_rounded_width / block_size ||
-       blocks_in_row != roundUpDivision(raw_width, block_size) ||
+       blocks_in_row != roundUpDivisionSafe(raw_width, block_size) ||
        total_lines > 0x800 || total_lines == 0 ||
        total_lines != raw_height / FujiStrip::lineHeight() ||
        (raw_bits != 12 && raw_bits != 14 && raw_bits != 16) ||

@@ -135,8 +135,8 @@ LJpegDecompressor::LJpegDecompressor(RawImage img, iRectangle2D imgFrame_,
       static_cast<int>(mRaw->getCpp()) * imgFrame.dim.x;
 
   // How many full pixel MCUs do we need to consume for that?
-  if (const auto mcusToConsume =
-          implicit_cast<int>(roundUpDivision(tileRequiredWidth, frame.mcu.x));
+  if (const auto mcusToConsume = implicit_cast<int>(
+          roundUpDivisionSafe(tileRequiredWidth, frame.mcu.x));
       frame.dim.x < mcusToConsume ||
       frame.mcu.y * frame.dim.y < imgFrame.dim.y ||
       frame.mcu.x * frame.dim.x < tileRequiredWidth) {
@@ -274,7 +274,7 @@ ByteStream::size_type LJpegDecompressor::decodeN() const {
   // the raw image buffer. The excessive content has to be ignored.
 
   invariant(imgFrame.dim.y % frame.mcu.y == 0);
-  const auto numRestartIntervals = implicit_cast<int>(roundUpDivision(
+  const auto numRestartIntervals = implicit_cast<int>(roundUpDivisionSafe(
       imgFrame.dim.y / frame.mcu.y, numLJpegRowsPerRestartInterval));
   invariant(numRestartIntervals >= 0);
   invariant(numRestartIntervals != 0);

@@ -91,7 +91,8 @@ void PanasonicV4Decompressor::chopInputIntoBlocks() {
   };
 
   // If section_split_offset == 0, last block may not be full.
-  const auto blocksTotal = roundUpDivision(input.getRemainSize(), BlockSize);
+  const auto blocksTotal =
+      roundUpDivisionSafe(input.getRemainSize(), BlockSize);
   invariant(blocksTotal > 0);
   invariant(blocksTotal * PixelsPerBlock >= mRaw->dim.area());
   assert(blocksTotal <= std::numeric_limits<uint32_t>::max());

@@ -98,7 +98,7 @@ PanasonicV5Decompressor::PanasonicV5Decompressor(RawImage img,
   invariant(numPackets > 0);
 
   // And how many blocks that would be? Last block may not be full, pad it.
-  numBlocks = roundUpDivision(numPackets, PacketsPerBlock);
+  numBlocks = roundUpDivisionSafe(numPackets, PacketsPerBlock);
   invariant(numBlocks > 0);
 
   // Does the input contain enough blocks?

@@ -208,8 +208,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::reconstructPass(
 #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
 #ifdef HAVE_OPENMP
 #pragma omp taskloop default(none) firstprivate(dst, process)                  \
-    num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(),        \
-                                  numChannels))
+    num_tasks(roundUpDivisionSafe(rawspeed_get_number_of_processor_cores(),    \
+                                      numChannels))
 #endif
   for (int row = 0; row < dst.height() / 2; ++row) {
 #pragma GCC diagnostic pop
@@ -267,9 +267,8 @@ VC5Decompressor::BandData VC5Decompressor::Wavelet::combineLowHighPass(
 #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
 #ifdef HAVE_OPENMP
 #pragma omp taskloop if (finalWavelet) default(none)                           \
-    firstprivate(dst, process)                                                 \
-    num_tasks(roundUpDivision(rawspeed_get_number_of_processor_cores(), 2))    \
-    mergeable
+    firstprivate(dst, process) num_tasks(roundUpDivisionSafe(                  \
+            rawspeed_get_number_of_processor_cores(), 2)) mergeable
 #endif
   for (int row = 0; row < dst.height(); ++row) {
 #pragma GCC diagnostic pop
@@ -411,7 +410,8 @@ VC5Decompressor::VC5Decompressor(ByteStream bs, const RawImage& img)
     for (Wavelet& wavelet : channel.wavelets) {
       // Pad dimensions as necessary and divide them by two for the next wavelet
       for (auto* dimension : {&waveletWidth, &waveletHeight})
-        *dimension = implicit_cast<uint16_t>(roundUpDivision(*dimension, 2));
+        *dimension =
+            implicit_cast<uint16_t>(roundUpDivisionSafe(*dimension, 2));
       wavelet.width = waveletWidth;
       wavelet.height = waveletHeight;
 
@@ -658,7 +658,7 @@ VC5Decompressor::Wavelet::LowPassBand::LowPassBand(Wavelet& wavelet_,
   const auto bitsTotal = waveletArea * lowpassPrecision;
   constexpr int bytesPerChunk = 8; // FIXME: or is it 4?
   constexpr int bitsPerChunk = 8 * bytesPerChunk;
-  const auto chunksTotal = roundUpDivision(bitsTotal, bitsPerChunk);
+  const auto chunksTotal = roundUpDivisionSafe(bitsTotal, bitsPerChunk);
   const auto bytesTotal = bytesPerChunk * chunksTotal;
   // And clamp the size / verify sufficient input while we are at it.
   // NOTE: this might fail (and should throw, not assert).

@@ -67,7 +67,7 @@ template <typename CoalescedType, typename PartType>
 auto coalesceElts(Array1DRef<const PartType> input) {
   std::vector<CoalescedType> outputStorage;
   {
-    outputStorage.reserve(implicit_cast<size_t>(roundUpDivision(
+    outputStorage.reserve(implicit_cast<size_t>(roundUpDivisionSafe(
         sizeof(PartType) * input.size(), sizeof(CoalescedType))));
     auto subIter = std::back_inserter(outputStorage);
     auto iter = CoalescingOutputIterator<decltype(subIter), PartType>(subIter);