Skip to content

Commit 49983ec

Browse files
committed
[C++] Expose prefetch range planning via Reader::preBufferRange and refactor preBuffer to reuse it
1 parent 3563ee5 commit 49983ec

3 files changed

Lines changed: 40 additions & 5 deletions

File tree

c++/include/orc/Reader.hh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <memory>
3333
#include <set>
3434
#include <string>
35+
#include <utility>
3536
#include <vector>
3637

3738
namespace orc {
@@ -696,6 +697,16 @@ namespace orc {
696697
virtual void preBuffer(const std::vector<uint32_t>& stripes,
697698
const std::list<uint64_t>& includeTypes) = 0;
698699

700+
/**
701+
* Calculate prefetch ranges by selected stripes and columns.
702+
* It is thread safe and does not cache data.
703+
* @param stripes the stripes to prefetch
704+
* @param includeTypes the types to prefetch
705+
* @return prefetch ranges as offset/length pairs
706+
*/
707+
virtual std::vector<std::pair<uint64_t, uint64_t>> preBufferRange(
708+
const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) = 0;
709+
699710
/**
700711
* Release cached entries whose right boundary is less than or equal to the given boundary.
701712
* @param boundary the boundary value to release cache entries

c++/src/Reader.cc

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,8 +1770,8 @@ namespace orc {
17701770
contents_->evictCache(boundary);
17711771
}
17721772

1773-
void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
1774-
const std::list<uint64_t>& includeTypes) {
1773+
std::vector<std::pair<uint64_t, uint64_t>> ReaderImpl::preBufferRange(
1774+
const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) {
17751775
std::vector<uint32_t> newStripes;
17761776
for (auto stripe : stripes) {
17771777
if (stripe < static_cast<uint32_t>(footer_->stripes_size())) newStripes.push_back(stripe);
@@ -1783,7 +1783,7 @@ namespace orc {
17831783
}
17841784

17851785
if (newStripes.empty() || newIncludeTypes.empty()) {
1786-
return;
1786+
return {};
17871787
}
17881788

17891789
orc::RowReaderOptions rowReaderOptions;
@@ -1792,12 +1792,33 @@ namespace orc {
17921792
std::vector<bool> selectedColumns;
17931793
columnSelector.updateSelected(selectedColumns, rowReaderOptions);
17941794

1795+
std::vector<std::pair<uint64_t, uint64_t>> ranges;
1796+
17951797
for (auto stripe : newStripes) {
17961798
const auto& stripeInfo = footer_->stripes(stripe);
17971799
proto::StripeFooter stripeFooter = getStripeFooter(stripeInfo, *contents_);
1798-
auto ranges = extractReadRangesForStripe(stripe, stripeInfo, stripeFooter, selectedColumns);
1799-
contents_->cacheRanges(std::move(ranges));
1800+
auto stripeRanges =
1801+
extractReadRangesForStripe(stripe, stripeInfo, stripeFooter, selectedColumns);
1802+
for (const auto& range : stripeRanges) {
1803+
ranges.emplace_back(range.offset, range.length);
1804+
}
1805+
}
1806+
return ranges;
1807+
}
1808+
1809+
void ReaderImpl::preBuffer(const std::vector<uint32_t>& stripes,
1810+
const std::list<uint64_t>& includeTypes) {
1811+
auto ranges = preBufferRange(stripes, includeTypes);
1812+
if (ranges.empty()) {
1813+
return;
1814+
}
1815+
1816+
std::vector<ReadRange> readRanges;
1817+
readRanges.reserve(ranges.size());
1818+
for (const auto& range : ranges) {
1819+
readRanges.emplace_back(range.first, range.second);
18001820
}
1821+
contents_->cacheRanges(std::move(readRanges));
18011822
}
18021823

18031824
RowReader::~RowReader() {

c++/src/Reader.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ namespace orc {
410410
std::map<uint32_t, BloomFilterIndex> getBloomFilters(
411411
uint32_t stripeIndex, const std::set<uint32_t>& included) const override;
412412

413+
std::vector<std::pair<uint64_t, uint64_t>> preBufferRange(
414+
const std::vector<uint32_t>& stripes, const std::list<uint64_t>& includeTypes) override;
415+
413416
void preBuffer(const std::vector<uint32_t>& stripes,
414417
const std::list<uint64_t>& includeTypes) override;
415418
void releaseBuffer(uint64_t boundary) override;

0 commit comments

Comments
 (0)