hnasrullakhan commented on issue #44410:
URL: https://github.com/apache/arrow/issues/44410#issuecomment-2417886240

   ```
    public VectorSchemaRoot  lz4CompressStringColumns1(VectorSchemaRoot 
vectorSchemaRoot) throws IOException {
           System.out.println("Newlz4CompressStringColumns start 
originalReadableBytes: "+ totaReadableBytes(vectorSchemaRoot));
           try (ByteArrayOutputStream byteArrayOutputStream = new 
ByteArrayOutputStream()) {
               int rowCount = vectorSchemaRoot.getRowCount();
               // Get LZ4 compressor instance
               LZ4Factory factory = LZ4Factory.fastestInstance();
               LZ4Compressor compressor = factory.fastCompressor();
   
               // Compress string columns using LZ4
               int column = 0;
               int allColumns = vectorSchemaRoot.getFieldVectors().size();
               System.out.println("all columns="+ allColumns);
   //            List<FieldVector> fieldVectors = 
vectorSchemaRoot.getFieldVectors();
   
               for (int col=0; col< allColumns ; col++) {
                       FieldVector v = vectorSchemaRoot.getVector(col);
                       Field field = v.getField();
   
                       if (field.getType().getTypeID() == 
org.apache.gluten.shaded.org.apache.arrow.vector.types.pojo.ArrowType.Utf8.TYPE_TYPE)
 {
                           System.out.println("Vector");
                           System.out.println(v);
                           final int valueCount = v.getValueCount();
                           final ArrowBuf dataBuffer = v.getDataBuffer();
                           final ArrowBuf validityBuffer = 
v.getValidityBuffer();
                           final ArrowBuf offsetBuffer = v.getOffsetBuffer();
                           int maxCompressedLength = 
compressor.maxCompressedLength((int) dataBuffer.readableBytes());
                           byte[] compressedBytes = new 
byte[maxCompressedLength];
                           ByteBuffer originalBuffer = dataBuffer.nioBuffer(0, 
(int) dataBuffer.readableBytes());
                           byte[] originalBytes = new 
byte[originalBuffer.remaining()];
                           System.out.println( " originalbytes: "+ 
originalBytes.length);
                           originalBuffer.get(originalBytes);
                           int compressedLength = 
compressor.compress(originalBytes, 0, originalBytes.length, compressedBytes, 0, 
maxCompressedLength);
                           System.out.println( " compressedLength: "+ 
compressedLength);
   
                           try (final ArrowBuf newDataBuffer = 
allocator.buffer(compressedLength)) {
   
                               if (compressedLength > newDataBuffer.capacity()) 
{
                                   throw new IllegalStateException("Compressed 
data exceeds buffer capacity.");
                               }
                               System.out.println( " compressedBytes: "+ 
compressedBytes.length);
   
                               newDataBuffer.writeBytes(compressedBytes, 0, 
compressedLength);
                               System.out.println("writer index 
before="+dataBuffer.writerIndex());
                               newDataBuffer.writerIndex(compressedLength);
                               System.out.println("writer index 
after="+newDataBuffer.writerIndex());
                               newDataBuffer.readerIndex(0);
                               dataBuffer.readerIndex(0);
                               v.setValueCount(valueCount);
                               ArrowFieldNode fieldNode = new 
ArrowFieldNode(v.getValueCount(), v.getNullCount());
                               v.loadFieldBuffers(fieldNode, 
List.of(validityBuffer, offsetBuffer, newDataBuffer));
                               
System.out.println("newDataBuffer="+newDataBuffer.readableBytes());
                               final ArrowBuf mewDBuffer = v.getDataBuffer();
                               
System.out.println("mewDBuffer="+mewDBuffer.readableBytes());
                           }
                       }
   
                   column++;
   
               }
               System.out.println("lz4CompressStringColumns 
compressedReadableBytes: "+ totaReadableBytes(vectorSchemaRoot));
   
               return vectorSchemaRoot;
           }
           catch (Exception e) {
               e.printStackTrace();
               throw e;  // Rethrow any exceptions to handle them properly
           }
       }
    public byte[]  serializeVectorSchemaRoot(VectorSchemaRoot vectorSchemaRoot) 
throws IOException {
              ByteArrayOutputStream byteArrayOutputStream = new 
ByteArrayOutputStream();
               // Serialize the full VectorSchemaRoot to a byte array
           try (ArrowStreamWriter writer = new 
ArrowStreamWriter(vectorSchemaRoot, null, byteArrayOutputStream)) {
               writer.start();
               writer.writeBatch();
               writer.end();
           }
           catch (Exception e) {
               e.printStackTrace();
               throw e;  // Rethrow any exceptions to handle them properly
           }
           System.out.println("lz4CompressStringColumns end");
           return byteArrayOutputStream.toByteArray();
       }
   
   
       val vsrNew = arrowAbiUtil.lz4CompressStringColumns2(vectorSchema)
   
       val serialBytes2 = arrowAbiUtil.serializeVectorSchemaRoot(vsrNew)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to