hnasrullakhan commented on issue #44410:
URL: https://github.com/apache/arrow/issues/44410#issuecomment-2417886240
```
public VectorSchemaRoot lz4CompressStringColumns1(VectorSchemaRoot
vectorSchemaRoot) throws IOException {
System.out.println("Newlz4CompressStringColumns start
originalReadableBytes: "+ totaReadableBytes(vectorSchemaRoot));
try (ByteArrayOutputStream byteArrayOutputStream = new
ByteArrayOutputStream()) {
int rowCount = vectorSchemaRoot.getRowCount();
// Get LZ4 compressor instance
LZ4Factory factory = LZ4Factory.fastestInstance();
LZ4Compressor compressor = factory.fastCompressor();
// Compress string columns using LZ4
int column = 0;
int allColumns = vectorSchemaRoot.getFieldVectors().size();
System.out.println("all columns="+ allColumns);
// List<FieldVector> fieldVectors =
vectorSchemaRoot.getFieldVectors();
for (int col=0; col< allColumns ; col++) {
FieldVector v = vectorSchemaRoot.getVector(col);
Field field = v.getField();
if (field.getType().getTypeID() ==
org.apache.gluten.shaded.org.apache.arrow.vector.types.pojo.ArrowType.Utf8.TYPE_TYPE)
{
System.out.println("Vector");
System.out.println(v);
final int valueCount = v.getValueCount();
final ArrowBuf dataBuffer = v.getDataBuffer();
final ArrowBuf validityBuffer =
v.getValidityBuffer();
final ArrowBuf offsetBuffer = v.getOffsetBuffer();
int maxCompressedLength =
compressor.maxCompressedLength((int) dataBuffer.readableBytes());
byte[] compressedBytes = new
byte[maxCompressedLength];
ByteBuffer originalBuffer = dataBuffer.nioBuffer(0,
(int) dataBuffer.readableBytes());
byte[] originalBytes = new
byte[originalBuffer.remaining()];
System.out.println( " originalbytes: "+
originalBytes.length);
originalBuffer.get(originalBytes);
int compressedLength =
compressor.compress(originalBytes, 0, originalBytes.length, compressedBytes, 0,
maxCompressedLength);
System.out.println( " compressedLength: "+
compressedLength);
try (final ArrowBuf newDataBuffer =
allocator.buffer(compressedLength)) {
if (compressedLength > newDataBuffer.capacity())
{
throw new IllegalStateException("Compressed
data exceeds buffer capacity.");
}
System.out.println( " compressedBytes: "+
compressedBytes.length);
newDataBuffer.writeBytes(compressedBytes, 0,
compressedLength);
System.out.println("writer index
before="+dataBuffer.writerIndex());
newDataBuffer.writerIndex(compressedLength);
System.out.println("writer index
after="+newDataBuffer.writerIndex());
newDataBuffer.readerIndex(0);
dataBuffer.readerIndex(0);
v.setValueCount(valueCount);
ArrowFieldNode fieldNode = new
ArrowFieldNode(v.getValueCount(), v.getNullCount());
v.loadFieldBuffers(fieldNode,
List.of(validityBuffer, offsetBuffer, newDataBuffer));
System.out.println("newDataBuffer="+newDataBuffer.readableBytes());
final ArrowBuf mewDBuffer = v.getDataBuffer();
System.out.println("mewDBuffer="+mewDBuffer.readableBytes());
}
}
column++;
}
System.out.println("lz4CompressStringColumns
compressedReadableBytes: "+ totaReadableBytes(vectorSchemaRoot));
return vectorSchemaRoot;
}
catch (Exception e) {
e.printStackTrace();
throw e; // Rethrow any exceptions to handle them properly
}
}
public byte[] serializeVectorSchemaRoot(VectorSchemaRoot vectorSchemaRoot)
throws IOException {
ByteArrayOutputStream byteArrayOutputStream = new
ByteArrayOutputStream();
// Serialize the full VectorSchemaRoot to a byte array
try (ArrowStreamWriter writer = new
ArrowStreamWriter(vectorSchemaRoot, null, byteArrayOutputStream)) {
writer.start();
writer.writeBatch();
writer.end();
}
catch (Exception e) {
e.printStackTrace();
throw e; // Rethrow any exceptions to handle them properly
}
System.out.println("lz4CompressStringColumns end");
return byteArrayOutputStream.toByteArray();
}
val vsrNew = arrowAbiUtil.lz4CompressStringColumns2(vectorSchema)
val serialBytes2 = arrowAbiUtil.serializeVectorSchemaRoot(vsrNew)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]