This is an automated email from the ASF dual-hosted git repository.

broustant pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr-sandbox.git


The following commit(s) were added to refs/heads/main by this push:
     new 0290676  Encryption IndexInput and IndexOutput can be unwrapped. More 
doc about IV construction. (#117)
0290676 is described below

commit 02906765861e37a558b541cd6f4759fd51e2dd46
Author: Bruno Roustant <[email protected]>
AuthorDate: Mon May 19 10:53:37 2025 +0200

    Encryption IndexInput and IndexOutput can be unwrapped. More doc about IV 
construction. (#117)
---
 .../solr/encryption/EncryptionDirectory.java       |  2 +-
 .../solr/encryption/crypto/AesCtrEncrypter.java    |  1 +
 .../apache/solr/encryption/crypto/AesCtrUtil.java  | 45 ++++++++++++++++++++++
 .../encryption/crypto/DecryptingIndexInput.java    |  8 ++--
 .../encryption/crypto/EncryptingIndexOutput.java   |  6 ++-
 5 files changed, 56 insertions(+), 6 deletions(-)

diff --git 
a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java 
b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
index c98bc17..74a7e31 100644
--- 
a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
+++ 
b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
@@ -275,7 +275,7 @@ public class EncryptionDirectory extends FilterDirectory {
    */
   protected String getKeyRefForReading(IndexInput indexInput) throws 
IOException {
     // Always reading the magic number, even for non-encrypted indexes, is not 
a performance
-    // issue because it will be read immediately again when the Directory is 
returned, to
+    // issue because it will be read immediately again when the IndexInput is 
returned, to
     // check the index header (CodecUtil.checkIndexHeader()).
     long filePointer = indexInput.getFilePointer();
     int magic = readBEInt(indexInput);
diff --git 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
index fd9ba1c..b23cbf3 100644
--- 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
+++ 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
@@ -23,6 +23,7 @@ package org.apache.solr.encryption.crypto;
  * <p>An {@link AesCtrEncrypter} must be first {@link #init(long) initialized} 
before it can be used to
  * {@link #process encrypt/decrypt}.
  * <p>Not thread safe.
+ * <p>See {@link AesCtrUtil} for internal doc about the choice of CTR mode.
  */
 public interface AesCtrEncrypter extends Cloneable {
 
diff --git 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
index 03f6d47..1dbd2f5 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
@@ -23,6 +23,17 @@ import java.security.SecureRandom;
  */
 public class AesCtrUtil {
 
+  // Rationale about the choice of the CTR-Mode:
+  // - simple, efficient, random-access.
+  // - adapted to Lucene immutable index files.
+  // - file integrity and error detection checks are verified by Lucene 
checksums.
+  // - nonce-misuse resistance is implemented when building the random IV in 
this class (see generateRandomAesCtrIv).
+  // - used in combination with a strong AES cipher.
+  //
+  // See 
https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/ctr/ctr-spec.pdf
+  // Comparison between AES-XTS and AES-CTR
+  // See 
https://crypto.stackexchange.com/questions/64556/aes-xts-vs-aes-ctr-for-write-once-storage?rq=1
+
   /**
    * AES block has a fixed length of 16 bytes (128 bits).
    */
@@ -69,6 +80,40 @@ public class AesCtrUtil {
     // The IV length must be the AES block size.
     // For the CTR mode, the IV is composed of a random NONCE (first bytes) 
and a counter (last bytes).
     // com.sun.crypto.provider.CounterMode.increment() increments the counter 
starting from the last byte.
+
+    // Rationale for the IV construction:
+    // We have to ensure the IV is not repeated for the same encryption key. 
This is the (IV,key) pair
+    // reuse problem. Otherwise, a crib-dragging technique could eliminate the 
confidentiality of the
+    // paired contents (this would not reveal the encryption key).
+    // One approach could be to use part of the IV bytes to encode the segment 
id. But the segment id has
+    // a potentially unlimited size. And we would still need to differentiate 
the IV of the files in the
+    // segment.
+    // The approach taken here is a nonce-misuse resistance, to rely on a 
near-0 probability of having two
+    // files sharing the same IV. We use all the remaining IV 11 bytes to 
generate 88 secure random bits
+    // nounce.
+    // The probability of having at least two files sharing the same IV is 
calculated here for 3 scenarios.
+    // The probability can be estimated by following the Birthday Problem 
resolution
+    // (https://en.wikipedia.org/wiki/Birthday_problem).
+    // with n the number of files/IVs, and d the number of possible nounce 
values with 11 bytes (2^88), then
+    // with n << d, the probability of having at least two identical IVs can 
be estimated by
+    // P(n,d) ~= 1 - exp(- n(n-1)/2d) ~= n²/2d
+    //
+    // Scenario 1 typical - less than 2000 segments files on disk for the 
index.
+    // Lucene frequently removes old unused segment files. Even if it takes 
some time to remove them on a
+    // busy machine, we can consider there are less than 100 segments for an 
index on disk, which gives
+    // less than 2000 files. This would be less with compound files.
+    // P(2000,2^88) ~= 2000²/2^89 ~= 6.5E-21
+    //
+    // Scenario 2 very bad case - the key is changed every 4 months, all the 
segments produced are recorded
+    // by an attacker, one commit/segment per 5 seconds, and 20 files per 
segment.
+    // Num files recorded = 4*30*24*60*60/5*20 ~= 4.14E7 files
+    // P(4.14E7, 2^88) ~= 2.7E-12
+    //
+    // Scenario 3 awfully bad case - the key is changed every year, all the 
segments produced are recorded
+    // by an attacker, one commit/segment per second, and 20 files per segment.
+    // Num files recorded = 365*24*60*60*20 ~= 6.3E8 files
+    // P(6.3E8, 2^88) ~= 6.4E-10
+
     byte[] nonce = new byte[IV_LENGTH - COUNTER_LENGTH];
     secureRandom.nextBytes(nonce);
     byte[] iv = new byte[IV_LENGTH];
diff --git 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
index a1a7fe8..a459330 100644
--- 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
+++ 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.encryption.crypto;
 
+import org.apache.lucene.store.FilterIndexInput;
 import org.apache.lucene.store.IndexInput;
 
 import java.io.EOFException;
@@ -29,11 +30,12 @@ import static 
org.apache.solr.encryption.crypto.AesCtrUtil.*;
  * the read-only index files. It can decrypt data previously encrypted with an 
{@link EncryptingIndexOutput}.
  * <p>It first reads the CTR Initialization Vector (IV). This random IV is not 
encrypted. Then it can decrypt the rest
  * of the file, which probably contains a header and footer, with random 
access.
+ * <p>It is a {@link FilterIndexInput}, so it is possible to {@link 
FilterIndexInput#unwrap} it.
  *
  * @see EncryptingIndexOutput
  * @see AesCtrEncrypter
  */
-public class DecryptingIndexInput extends IndexInput {
+public class DecryptingIndexInput extends FilterIndexInput {
 
   /**
    * Must be a multiple of {@link AesCtrUtil#AES_BLOCK_SIZE}.
@@ -84,7 +86,7 @@ public class DecryptingIndexInput extends IndexInput {
                               byte[] key,
                               AesCtrEncrypterFactory factory,
                               int bufferCapacity) throws IOException {
-    this("Decrypting " + indexInput.toString(),
+    this("Decrypting " + indexInput,
          indexInput.getFilePointer() + IV_LENGTH,
          indexInput.getFilePointer() + IV_LENGTH,
          indexInput.length() - indexInput.getFilePointer() - IV_LENGTH,
@@ -102,7 +104,7 @@ public class DecryptingIndexInput extends IndexInput {
                                IndexInput indexInput,
                                AesCtrEncrypter encrypter,
                                int bufferCapacity) {
-    super(resourceDescription);
+    super(resourceDescription, indexInput);
     assert delegateOffset >= 0 && sliceOffset >= 0 && sliceLength >= 0;
     this.delegateOffset = delegateOffset;
     this.sliceOffset = sliceOffset;
diff --git 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
index 04a56f2..fd0e8f0 100644
--- 
a/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
+++ 
b/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
@@ -17,6 +17,7 @@
 package org.apache.solr.encryption.crypto;
 
 import org.apache.lucene.store.BufferedChecksum;
+import org.apache.lucene.store.FilterIndexOutput;
 import org.apache.lucene.store.IndexOutput;
 
 import java.io.IOException;
@@ -32,11 +33,12 @@ import static 
org.apache.solr.encryption.crypto.AesCtrUtil.*;
  * <p>It generates a cryptographically strong random CTR Initialization Vector 
(IV). This random IV is not encrypted and
  * is skipped by any {@link DecryptingIndexInput} reading the written data. 
Then it can encrypt the rest of the file
  * which probably contains a header and footer.
+ * <p>It is a {@link FilterIndexOutput}, so it is possible to {@link 
FilterIndexOutput#unwrap} it.
  *
  * @see DecryptingIndexInput
  * @see AesCtrEncrypter
  */
-public class EncryptingIndexOutput extends IndexOutput {
+public class EncryptingIndexOutput extends FilterIndexOutput {
 
   /**
    * Must be a multiple of {@link AesCtrUtil#AES_BLOCK_SIZE}.
@@ -81,7 +83,7 @@ public class EncryptingIndexOutput extends IndexOutput {
                                AesCtrEncrypterFactory factory,
                                int bufferCapacity)
     throws IOException {
-    super("Encrypting " + indexOutput.toString(), indexOutput.getName());
+    super("Encrypting " + indexOutput, indexOutput.getName(), indexOutput);
     this.indexOutput = indexOutput;
     byte[] iv = generateRandomIv();
     encrypter = factory.create(key, iv);

Reply via email to