diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 2a782d52bbab7b5..198637e5d32ee0e 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -172,8 +172,8 @@ Status ScannerContext::init() { _free_blocks_capacity = _max_thread_num * _block_per_scanner; auto block = get_free_block(); _estimated_block_bytes = std::max(block->allocated_bytes(), (size_t)16); - int min_blocks = (_estimated_block_bytes + config::min_bytes_in_scanner_queue - 1) / - config::min_bytes_in_scanner_queue; + int min_blocks = (config::min_bytes_in_scanner_queue + _estimated_block_bytes - 1) / + _estimated_block_bytes; _free_blocks_capacity = std::max(_free_blocks_capacity, min_blocks); return_free_block(std::move(block)); diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index ba9c1fdee10a5bd..bce5fc6c92957d4 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -146,6 +146,10 @@ class ScannerContext : public std::enable_shared_from_this { // todo(wb) rethinking how to calculate ```_max_bytes_in_queue``` when executing shared scan inline bool should_be_scheduled() const { + VLOG_NOTICE << "curl_bytes_in_queue " << _cur_bytes_in_queue << " max_bytes_in_queue " + << _max_bytes_in_queue << " _serving_blocks_num " << _serving_blocks_num + << " _free_blocks_capacity " << _free_blocks_capacity + << " estimated_block_bytes " << _estimated_block_bytes; return (_cur_bytes_in_queue < _max_bytes_in_queue / 2) && (_serving_blocks_num < allowed_blocks_num()); } diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index e8d7f8a7139a6d3..334c25704b3dce6 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -314,8 +314,6 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, // judge if we need to yield. So we record all raw data read in this round // scan, if this exceeds row number or bytes threshold, we yield this thread. std::vector blocks; - int64_t raw_rows_read = scanner->get_rows_read(); - int64_t raw_rows_threshold = raw_rows_read + config::doris_scanner_row_num; int64_t raw_bytes_read = 0; int64_t raw_bytes_threshold = config::doris_scanner_row_bytes; int num_rows_in_block = 0; @@ -329,11 +327,10 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, // queue, it will affect query latency and query concurrency for example ssb 3.3. auto should_do_scan = [&, batch_size = state->batch_size(), time = state->wait_full_block_schedule_times()]() { - if (raw_bytes_read < raw_bytes_threshold && raw_rows_read < raw_rows_threshold) { + if (raw_bytes_read < raw_bytes_threshold) { return true; } else if (num_rows_in_block < batch_size) { - return raw_bytes_read < raw_bytes_threshold * time && - raw_rows_read < raw_rows_threshold * time; + return raw_bytes_read < raw_bytes_threshold * time; } return false; }; @@ -380,7 +377,6 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler, blocks.push_back(std::move(block)); } } - raw_rows_read = scanner->get_rows_read(); } // end for while // if we failed, check status.