Skip to content

Commit 3444fe3

Browse files
committed
[C++] Add external-buffer support in DataBuffer lifecycle management
1 parent 6546faf commit 3444fe3

3 files changed

Lines changed: 119 additions & 16 deletions

File tree

c++/include/orc/MemoryPool.hh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@ namespace orc {
4242
uint64_t currentSize_;
4343
// maximal capacity (actual allocated memory)
4444
uint64_t currentCapacity_;
45+
// whether this buffer owns memory lifecycle
46+
bool ownBuffer_;
4547

4648
// not implemented
4749
DataBuffer(DataBuffer& buffer);
4850
DataBuffer& operator=(DataBuffer& buffer);
4951

5052
public:
51-
DataBuffer(MemoryPool& pool, uint64_t size = 0);
53+
DataBuffer(MemoryPool& pool, uint64_t size = 0, bool ownBuf = true);
5254

5355
DataBuffer(DataBuffer<T>&& buffer) noexcept;
5456

@@ -81,6 +83,9 @@ namespace orc {
8183
void reserve(uint64_t size);
8284
void resize(uint64_t size);
8385
void zeroOut();
86+
87+
// Attach to an external raw buffer. bufSize is in bytes.
88+
void setData(T* buf, size_t bufSize);
8489
};
8590

8691
// Specializations for char

c++/src/MemoryPool.cc

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <string.h>
2525
#include <cstdlib>
2626
#include <iostream>
27+
#include <type_traits>
2728

2829
namespace orc {
2930

@@ -52,35 +53,51 @@ namespace orc {
5253
}
5354

5455
template <class T>
55-
DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize)
56-
: memoryPool_(pool), buf_(nullptr), currentSize_(0), currentCapacity_(0) {
57-
reserve(newSize);
58-
currentSize_ = newSize;
56+
DataBuffer<T>::DataBuffer(MemoryPool& pool, uint64_t newSize, bool ownBuf)
57+
: memoryPool_(pool),
58+
buf_(nullptr),
59+
currentSize_(0),
60+
currentCapacity_(0),
61+
ownBuffer_(ownBuf) {
62+
if (ownBuffer_) {
63+
reserve(newSize);
64+
currentSize_ = newSize;
65+
}
5966
}
6067

6168
template <class T>
6269
DataBuffer<T>::DataBuffer(DataBuffer<T>&& buffer) noexcept
6370
: memoryPool_(buffer.memoryPool_),
6471
buf_(buffer.buf_),
6572
currentSize_(buffer.currentSize_),
66-
currentCapacity_(buffer.currentCapacity_) {
73+
currentCapacity_(buffer.currentCapacity_),
74+
ownBuffer_(buffer.ownBuffer_) {
6775
buffer.buf_ = nullptr;
6876
buffer.currentSize_ = 0;
6977
buffer.currentCapacity_ = 0;
78+
buffer.ownBuffer_ = true;
7079
}
7180

7281
template <class T>
7382
DataBuffer<T>::~DataBuffer() {
83+
if (!ownBuffer_) {
84+
return;
85+
}
7486
for (uint64_t i = currentSize_; i > 0; --i) {
7587
(buf_ + i - 1)->~T();
7688
}
7789
if (buf_) {
90+
static_assert(std::is_trivially_copyable<T>::value,
91+
"Only trivially copyable type is supported for DataBuffer reserve");
7892
memoryPool_.free(reinterpret_cast<char*>(buf_));
7993
}
8094
}
8195

8296
template <class T>
8397
void DataBuffer<T>::resize(uint64_t newSize) {
98+
if (!ownBuffer_) {
99+
return;
100+
}
84101
reserve(newSize);
85102
if (currentSize_ > newSize) {
86103
for (uint64_t i = currentSize_; i > newSize; --i) {
@@ -96,6 +113,9 @@ namespace orc {
96113

97114
template <class T>
98115
void DataBuffer<T>::reserve(uint64_t newCapacity) {
116+
if (!ownBuffer_) {
117+
return;
118+
}
99119
if (newCapacity > currentCapacity_ || !buf_) {
100120
if (buf_) {
101121
T* buf_old = buf_;
@@ -114,6 +134,18 @@ namespace orc {
114134
memset(buf_, 0, sizeof(T) * currentCapacity_);
115135
}
116136

137+
template <class T>
138+
void DataBuffer<T>::setData(T* buffer, size_t bufSize) {
139+
if (ownBuffer_ && buf_) {
140+
static_assert(std::is_trivially_copyable<T>::value,
141+
"Only trivially copyable type is supported for DataBuffer reserve");
142+
memoryPool_.free(reinterpret_cast<char*>(buf_));
143+
}
144+
ownBuffer_ = false;
145+
buf_ = buffer;
146+
currentSize_ = currentCapacity_ = static_cast<uint64_t>(bufSize / sizeof(T));
147+
}
148+
117149
// Specializations for Int128
118150
template <>
119151
void DataBuffer<Int128>::zeroOut() {
@@ -126,13 +158,16 @@ namespace orc {
126158

127159
template <>
128160
DataBuffer<char>::~DataBuffer() {
129-
if (buf_) {
161+
if (ownBuffer_ && buf_) {
130162
memoryPool_.free(reinterpret_cast<char*>(buf_));
131163
}
132164
}
133165

134166
template <>
135167
void DataBuffer<char>::resize(uint64_t newSize) {
168+
if (!ownBuffer_) {
169+
return;
170+
}
136171
reserve(newSize);
137172
if (newSize > currentSize_) {
138173
memset(buf_ + currentSize_, 0, newSize - currentSize_);
@@ -144,13 +179,16 @@ namespace orc {
144179

145180
template <>
146181
DataBuffer<char*>::~DataBuffer() {
147-
if (buf_) {
182+
if (ownBuffer_ && buf_) {
148183
memoryPool_.free(reinterpret_cast<char*>(buf_));
149184
}
150185
}
151186

152187
template <>
153188
void DataBuffer<char*>::resize(uint64_t newSize) {
189+
if (!ownBuffer_) {
190+
return;
191+
}
154192
reserve(newSize);
155193
if (newSize > currentSize_) {
156194
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(char*));
@@ -162,13 +200,16 @@ namespace orc {
162200

163201
template <>
164202
DataBuffer<double>::~DataBuffer() {
165-
if (buf_) {
203+
if (ownBuffer_ && buf_) {
166204
memoryPool_.free(reinterpret_cast<char*>(buf_));
167205
}
168206
}
169207

170208
template <>
171209
void DataBuffer<double>::resize(uint64_t newSize) {
210+
if (!ownBuffer_) {
211+
return;
212+
}
172213
reserve(newSize);
173214
if (newSize > currentSize_) {
174215
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(double));
@@ -180,13 +221,16 @@ namespace orc {
180221

181222
template <>
182223
DataBuffer<float>::~DataBuffer() {
183-
if (buf_) {
224+
if (ownBuffer_ && buf_) {
184225
memoryPool_.free(reinterpret_cast<char*>(buf_));
185226
}
186227
}
187228

188229
template <>
189230
void DataBuffer<float>::resize(uint64_t newSize) {
231+
if (!ownBuffer_) {
232+
return;
233+
}
190234
reserve(newSize);
191235
if (newSize > currentSize_) {
192236
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(float));
@@ -198,13 +242,16 @@ namespace orc {
198242

199243
template <>
200244
DataBuffer<int64_t>::~DataBuffer() {
201-
if (buf_) {
245+
if (ownBuffer_ && buf_) {
202246
memoryPool_.free(reinterpret_cast<char*>(buf_));
203247
}
204248
}
205249

206250
template <>
207251
void DataBuffer<int64_t>::resize(uint64_t newSize) {
252+
if (!ownBuffer_) {
253+
return;
254+
}
208255
reserve(newSize);
209256
if (newSize > currentSize_) {
210257
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int64_t));
@@ -216,13 +263,16 @@ namespace orc {
216263

217264
template <>
218265
DataBuffer<int32_t>::~DataBuffer() {
219-
if (buf_) {
266+
if (ownBuffer_ && buf_) {
220267
memoryPool_.free(reinterpret_cast<char*>(buf_));
221268
}
222269
}
223270

224271
template <>
225272
void DataBuffer<int32_t>::resize(uint64_t newSize) {
273+
if (!ownBuffer_) {
274+
return;
275+
}
226276
reserve(newSize);
227277
if (newSize > currentSize_) {
228278
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int32_t));
@@ -234,13 +284,16 @@ namespace orc {
234284

235285
template <>
236286
DataBuffer<int16_t>::~DataBuffer() {
237-
if (buf_) {
287+
if (ownBuffer_ && buf_) {
238288
memoryPool_.free(reinterpret_cast<char*>(buf_));
239289
}
240290
}
241291

242292
template <>
243293
void DataBuffer<int16_t>::resize(uint64_t newSize) {
294+
if (!ownBuffer_) {
295+
return;
296+
}
244297
reserve(newSize);
245298
if (newSize > currentSize_) {
246299
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int16_t));
@@ -252,13 +305,16 @@ namespace orc {
252305

253306
template <>
254307
DataBuffer<int8_t>::~DataBuffer() {
255-
if (buf_) {
308+
if (ownBuffer_ && buf_) {
256309
memoryPool_.free(reinterpret_cast<char*>(buf_));
257310
}
258311
}
259312

260313
template <>
261314
void DataBuffer<int8_t>::resize(uint64_t newSize) {
315+
if (!ownBuffer_) {
316+
return;
317+
}
262318
reserve(newSize);
263319
if (newSize > currentSize_) {
264320
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int8_t));
@@ -270,13 +326,16 @@ namespace orc {
270326

271327
template <>
272328
DataBuffer<uint64_t>::~DataBuffer() {
273-
if (buf_) {
329+
if (ownBuffer_ && buf_) {
274330
memoryPool_.free(reinterpret_cast<char*>(buf_));
275331
}
276332
}
277333

278334
template <>
279335
void DataBuffer<uint64_t>::resize(uint64_t newSize) {
336+
if (!ownBuffer_) {
337+
return;
338+
}
280339
reserve(newSize);
281340
if (newSize > currentSize_) {
282341
memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(uint64_t));
@@ -288,13 +347,16 @@ namespace orc {
288347

289348
template <>
290349
DataBuffer<unsigned char>::~DataBuffer() {
291-
if (buf_) {
350+
if (ownBuffer_ && buf_) {
292351
memoryPool_.free(reinterpret_cast<char*>(buf_));
293352
}
294353
}
295354

296355
template <>
297356
void DataBuffer<unsigned char>::resize(uint64_t newSize) {
357+
if (!ownBuffer_) {
358+
return;
359+
}
298360
reserve(newSize);
299361
if (newSize > currentSize_) {
300362
memset(buf_ + currentSize_, 0, newSize - currentSize_);

c++/test/TestCache.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@
2626

2727
namespace orc {
2828

29+
class CountingMemoryPool : public MemoryPool {
30+
public:
31+
uint64_t allocCount = 0;
32+
uint64_t freeCount = 0;
33+
34+
char* malloc(uint64_t size) override {
35+
++allocCount;
36+
return static_cast<char*>(std::malloc(size));
37+
}
38+
39+
void free(char* p) override {
40+
++freeCount;
41+
std::free(p);
42+
}
43+
};
44+
2945
TEST(TestReadRangeCombiner, testBasics) {
3046
ReadRangeCombiner combinator{0, 100};
3147
/// Ranges with partial overlap and identical offsets
@@ -139,4 +155,24 @@ namespace orc {
139155
slice = cache.read({20, 2});
140156
assert_slice_equal(slice, "uv");
141157
}
158+
159+
TEST(TestDataBuffer, testExternalBufferNonOwning) {
160+
CountingMemoryPool pool;
161+
char external[16] = {0};
162+
uint64_t freeCountAfterSetData = 0;
163+
164+
{
165+
DataBuffer<char> buffer(pool, 0);
166+
buffer.setData(external, sizeof(external));
167+
freeCountAfterSetData = pool.freeCount;
168+
169+
// Non-owning buffer should keep external size and ignore resize.
170+
EXPECT_EQ(sizeof(external), buffer.size());
171+
buffer.resize(8);
172+
EXPECT_EQ(sizeof(external), buffer.size());
173+
}
174+
175+
// setData may release previously owned internal memory, but destruction should not free external.
176+
EXPECT_EQ(freeCountAfterSetData, pool.freeCount);
177+
}
142178
} // namespace orc

0 commit comments

Comments
 (0)