|
35 | 35 | import java.util.ArrayList; |
36 | 36 | import java.util.Arrays; |
37 | 37 | import java.util.List; |
| 38 | +import java.util.Random; |
38 | 39 |
|
| 40 | +import static org.apache.orc.impl.RecordReaderUtils.MAX_BYTE_WIDTH; |
| 41 | +import static org.apache.orc.impl.RecordReaderUtils.MAX_VALUES_LENGTH; |
39 | 42 | import static org.junit.jupiter.api.Assertions.assertEquals; |
40 | 43 | import static org.junit.jupiter.api.Assertions.assertNotEquals; |
41 | 44 | import static org.junit.jupiter.api.Assertions.assertNotSame; |
42 | 45 | import static org.junit.jupiter.api.Assertions.assertSame; |
| 46 | +import static org.junit.jupiter.api.Assertions.assertThrows; |
| 47 | +import static org.junit.jupiter.api.Assertions.assertTrue; |
43 | 48 | import static org.junit.jupiter.api.Assertions.fail; |
44 | 49 |
|
45 | 50 | public class TestInStream { |
@@ -1000,4 +1005,67 @@ public void testStreamResetWithoutIncreasedLength() throws IOException { |
1000 | 1005 | byte[] inBuffer = new byte[5]; |
1001 | 1006 | assertEquals(5, inStream.read(inBuffer)); |
1002 | 1007 | } |
| 1008 | + |
| 1009 | + /** |
| 1010 | + * Demonstrates that the old estimateRgEndOffset slop calculation is insufficient. |
| 1011 | + * When a compressed stream is truncated at the old estimated end offset, |
| 1012 | + * reading a full RLE v2 DIRECT run fails because the estimated slop doesn't |
| 1013 | + * account for enough compressed blocks. |
| 1014 | + */ |
| 1015 | + @Test |
| 1016 | + public void testTruncatedRleV2DirectRunAtEstimatedEndFails() throws Exception { |
| 1017 | + final int bufferSize = 1024; |
| 1018 | + final int chunkSize = OutStream.HEADER_SIZE + bufferSize; |
| 1019 | + final int nextGroupOffset = bufferSize; |
| 1020 | + final int oldStretchFactor = |
| 1021 | + 2 + (MAX_VALUES_LENGTH * MAX_BYTE_WIDTH - 1) / bufferSize; |
| 1022 | + final int oldEstimatedEnd = nextGroupOffset + oldStretchFactor * chunkSize; |
| 1023 | + |
| 1024 | + TestInStream.OutputCollector receiver = new TestInStream.OutputCollector(); |
| 1025 | + CompressionCodec codec = new ZlibCodec(); |
| 1026 | + StreamOptions streamOptions = new StreamOptions(bufferSize) |
| 1027 | + .withCodec(codec, codec.getDefaultOptions()); |
| 1028 | + byte[] data = new byte[bufferSize * 6]; |
| 1029 | + new Random(42).nextBytes(data); |
| 1030 | + try (OutStream out = new OutStream("test", streamOptions, receiver)) { |
| 1031 | + out.write(data); |
| 1032 | + out.flush(); |
| 1033 | + } |
| 1034 | + |
| 1035 | + byte[] encoded = receiver.buffer.get(); |
| 1036 | + assertEquals(nextGroupOffset + 5 * chunkSize, oldEstimatedEnd); |
| 1037 | + assertTrue(encoded.length > oldEstimatedEnd); |
| 1038 | + |
| 1039 | + InStream stream = InStream.create("test", |
| 1040 | + new BufferChunk(ByteBuffer.wrap(encoded, 0, oldEstimatedEnd), 0), |
| 1041 | + 0, oldEstimatedEnd, |
| 1042 | + InStream.options().withCodec(codec).withBufferSize(bufferSize)); |
| 1043 | + byte[] rleDirectRun = new byte[MAX_VALUES_LENGTH * MAX_BYTE_WIDTH |
| 1044 | + + RecordReaderUtils.RLE_V2_HEADER_SIZE]; |
| 1045 | + |
| 1046 | + stream.seek(new SimplePositionProvider(nextGroupOffset, 0)); |
| 1047 | + IllegalArgumentException error = assertThrows( |
| 1048 | + IllegalArgumentException.class, () -> { |
| 1049 | + int offset = 0; |
| 1050 | + while (offset < rleDirectRun.length) { |
| 1051 | + offset += stream.read( |
| 1052 | + rleDirectRun, offset, rleDirectRun.length - offset); |
| 1053 | + } |
| 1054 | + }); |
| 1055 | + assertTrue(error.getMessage().contains("Buffer size too small")); |
| 1056 | + } |
| 1057 | + |
| 1058 | + private static class SimplePositionProvider implements PositionProvider { |
| 1059 | + private final long[] positions; |
| 1060 | + private int index = 0; |
| 1061 | + |
| 1062 | + SimplePositionProvider(long... positions) { |
| 1063 | + this.positions = positions; |
| 1064 | + } |
| 1065 | + |
| 1066 | + @Override |
| 1067 | + public long getNext() { |
| 1068 | + return positions[index++]; |
| 1069 | + } |
| 1070 | + } |
1003 | 1071 | } |
0 commit comments