zxf216 commented on issue #1183:
URL: https://github.com/apache/orc/issues/1183#issuecomment-1905401058

   @dongjoon-hyun @wgtmac @coderex2522 
   We are developing a C++ feature to read encrypted columns in ORC, but have 
encountered an issue where the program throws an error when it needs to skip 
ahead due to a set filter condition. Below is a sample code of our attempt to 
read the encrypted columns.
   **Define DecryptionInputStream:**
   ```
   class DecryptionInputStream : public SeekableInputStream {
      public:
       DecryptionInputStream(std::unique_ptr<SeekableInputStream> 
input,std::vector<unsigned char> key,
                             std::vector<unsigned char> iv,const EVP_CIPHER* 
cipher,MemoryPool& pool);
       virtual ~DecryptionInputStream();
   
       virtual bool Next(const void** data, int* size) override;
       virtual void BackUp(int count) override;
       virtual bool Skip(int count) override;
       virtual google::protobuf::int64 ByteCount() const override;
       virtual void seek(PositionProvider& position) override;
       virtual std::string getName() const override;
   
      private:
       std::unique_ptr<SeekableInputStream> input_;
       std::vector<unsigned char> key_;
       std::vector<unsigned char> iv_;
       EVP_CIPHER_CTX* ctx_;
       const EVP_CIPHER* cipher;
       MemoryPool& pool;
       std::unique_ptr<DataBuffer<unsigned char>> inputBuffer_;
       std::unique_ptr<DataBuffer<unsigned char>> outputBuffer_;
     };
   }  // namespace orc
   ```
   
   
   **Implement  DecryptionInputStream class:**
   ```
   
DecryptionInputStream::DecryptionInputStream(std::unique_ptr<SeekableInputStream>
 input,
                                                  std::vector<unsigned char> 
key,
                                                  std::vector<unsigned char> iv,
                                                  const EVP_CIPHER* 
cipher,MemoryPool& pool)
         : input_(std::move(input)),
           key_(key),
           iv_(iv),
           cipher(cipher),
           pool(pool){
       EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new();
       if (ctx == nullptr) {
         throw std::runtime_error("Failed to create EVP cipher context");
       }
       int ret = EVP_DecryptInit_ex(ctx, cipher, NULL, key_.data(), iv_.data());
       if (ret != 1) {
         EVP_CIPHER_CTX_free(ctx);
         EVP_CIPHER_free(const_cast<evp_cipher_st*>(cipher));
         throw std::runtime_error("Failed to initialize EVP cipher context");
       }
       ctx_ = ctx;
       outputBuffer_.reset(new DataBuffer<unsigned char>(pool));
       inputBuffer_.reset(new DataBuffer<unsigned char>(pool));
     }
   
     DecryptionInputStream::~DecryptionInputStream() {
       EVP_CIPHER_CTX_free(ctx_);
       EVP_CIPHER_free(const_cast<evp_cipher_st*>(cipher));
     }
   
     bool DecryptionInputStream::Next(const void** data, int* size) {
       int bytesRead = 0;
       //const void* ptr;
       const void* inptr = static_cast<void*>(inputBuffer_->data());
       input_->Next(&inptr, &bytesRead);
       if (bytesRead == 0) {
         return false;
       }
       //
       const unsigned char* result = static_cast<const unsigned char*>(inptr);
       int outlen = 0;
       //int blockSize = EVP_CIPHER_block_size(this->cipher);
       outputBuffer_->resize(bytesRead*2);
       int ret = EVP_DecryptUpdate(ctx_, outputBuffer_->data(), &outlen, 
result, bytesRead);
       if (ret != 1) {
         throw std::runtime_error("Failed to decrypt data");
       }
       outputBuffer_->resize(outlen);
       *data = outputBuffer_->data();
       *size = outputBuffer_->size();
       return true;
     }
     void DecryptionInputStream::BackUp(int count) {
       this->input_->BackUp(count);
     }
   
     bool DecryptionInputStream::Skip(int count) {
       return this->input_->Skip(count);
   
     }
   
     google::protobuf::int64 DecryptionInputStream::ByteCount() const {
       return input_->ByteCount();
     }
   
     void DecryptionInputStream::seek(PositionProvider& position) {
       input_->seek(position);
     }
   
     std::string DecryptionInputStream::getName() const {
       return "DecryptionInputStream("+input_->getName()+")";
     }
     
   ```
     
     **Used in StripeStream.cc:**
    ```
    std::unique_ptr<SeekableInputStream> StripeStreamsImpl::getStream(uint64_t 
columnId,
                                                                       
proto::Stream_Kind kind,
                                                                       bool 
shouldStream) const{
       MemoryPool* pool = reader.getFileContents().pool;
       const std::string skey = std::to_string(columnId) + ":" + 
std::to_string(kind);
       StreamInformation* streamInformation = streamMap[skey].get();
       if(streamInformation == nullptr){
         return nullptr;
       }
       
       uint64_t myBlock = shouldStream ? input.getNaturalReadSize() : 
streamInformation->getLength();
       auto inputStream = std::make_unique<SeekableFileInputStream>(
           &input, streamInformation->getOffset(), 
streamInformation->getLength(), *pool, myBlock);
       ReaderEncryptionVariant* variant = 
reader.getReaderEncryption()->getVariant(columnId);
       if (variant != nullptr) {
         ReaderEncryptionKey* encryptionKey = variant->getKeyDescription();
         const int ivLength = encryptionKey->getAlgorithm()->getIvLength();
         std::vector<unsigned char> iv(ivLength);
         orc::CryptoUtil::modifyIvForStream(columnId, kind, originalStripeId, 
iv.data(), ivLength);
         const EVP_CIPHER* cipher = 
encryptionKey->getAlgorithm()->createCipher();
         // FooterKey
         std::vector<unsigned char> key = 
variant->getStripeKey(stripeIndex)->getEncoded();
         std::unique_ptr<SeekableInputStream> decompressStream = 
createDecompressorAndDecryption(
             reader.getCompression(), std::move(inputStream), 
reader.getCompressionSize(), *pool,
             reader.getFileContents().readerMetrics, key,
             iv, const_cast<EVP_CIPHER*>(cipher));
         return decompressStream;
       } else {
         return createDecompressor(reader.getCompression(), 
std::move(inputStream),
                                   reader.getCompressionSize(), *pool,
                                   reader.getFileContents().readerMetrics);
       }
     }
     
   std::unique_ptr<SeekableInputStream> createDecompressorAndDecryption(
         CompressionKind kind, std::unique_ptr<SeekableInputStream> input, 
uint64_t blockSize,
         MemoryPool& pool, ReaderMetrics* metrics,std::vector<unsigned char> 
key,
         std::vector<unsigned char> iv,const EVP_CIPHER* cipher){
         
         auto dec = 
std::make_unique<DecryptionInputStream>(std::move(input),key,iv,cipher,pool);
         return createDecompressor(kind,std::move(dec),blockSize,pool,metrics);
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to