This is an automated email from the ASF dual-hosted git repository.
broustant pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr-sandbox.git
The following commit(s) were added to refs/heads/main by this push:
new 0290676 Encryption IndexInput and IndexOutput can be unwrapped. More
doc about IV construction. (#117)
0290676 is described below
commit 02906765861e37a558b541cd6f4759fd51e2dd46
Author: Bruno Roustant <[email protected]>
AuthorDate: Mon May 19 10:53:37 2025 +0200
Encryption IndexInput and IndexOutput can be unwrapped. More doc about IV
construction. (#117)
---
.../solr/encryption/EncryptionDirectory.java | 2 +-
.../solr/encryption/crypto/AesCtrEncrypter.java | 1 +
.../apache/solr/encryption/crypto/AesCtrUtil.java | 45 ++++++++++++++++++++++
.../encryption/crypto/DecryptingIndexInput.java | 8 ++--
.../encryption/crypto/EncryptingIndexOutput.java | 6 ++-
5 files changed, 56 insertions(+), 6 deletions(-)
diff --git
a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
index c98bc17..74a7e31 100644
---
a/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
+++
b/encryption/src/main/java/org/apache/solr/encryption/EncryptionDirectory.java
@@ -275,7 +275,7 @@ public class EncryptionDirectory extends FilterDirectory {
*/
protected String getKeyRefForReading(IndexInput indexInput) throws
IOException {
// Always reading the magic number, even for non-encrypted indexes, is not
a performance
- // issue because it will be read immediately again when the Directory is
returned, to
+ // issue because it will be read immediately again when the IndexInput is
returned, to
// check the index header (CodecUtil.checkIndexHeader()).
long filePointer = indexInput.getFilePointer();
int magic = readBEInt(indexInput);
diff --git
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
index fd9ba1c..b23cbf3 100644
---
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
+++
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrEncrypter.java
@@ -23,6 +23,7 @@ package org.apache.solr.encryption.crypto;
* <p>An {@link AesCtrEncrypter} must be first {@link #init(long) initialized}
before it can be used to
* {@link #process encrypt/decrypt}.
* <p>Not thread safe.
+ * <p>See {@link AesCtrUtil} for internal doc about the choice of CTR mode.
*/
public interface AesCtrEncrypter extends Cloneable {
diff --git
a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
index 03f6d47..1dbd2f5 100644
--- a/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
+++ b/encryption/src/main/java/org/apache/solr/encryption/crypto/AesCtrUtil.java
@@ -23,6 +23,17 @@ import java.security.SecureRandom;
*/
public class AesCtrUtil {
+ // Rationale about the choice of the CTR-Mode:
+ // - simple, efficient, random-access.
+ // - adapted to Lucene immutable index files.
+ // - file integrity and error detection checks are verified by Lucene
checksums.
+ // - nonce-misuse resistance is implemented when building the random IV in
this class (see generateRandomAesCtrIv).
+ // - used in combination with a strong AES cipher.
+ //
+ // See
https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/ctr/ctr-spec.pdf
+ // Comparison between AES-XTS and AES-CTR
+ // See
https://crypto.stackexchange.com/questions/64556/aes-xts-vs-aes-ctr-for-write-once-storage?rq=1
+
/**
* AES block has a fixed length of 16 bytes (128 bits).
*/
@@ -69,6 +80,40 @@ public class AesCtrUtil {
// The IV length must be the AES block size.
// For the CTR mode, the IV is composed of a random NONCE (first bytes)
and a counter (last bytes).
// com.sun.crypto.provider.CounterMode.increment() increments the counter
starting from the last byte.
+
+ // Rationale for the IV construction:
+ // We have to ensure the IV is not repeated for the same encryption key.
This is the (IV,key) pair
+ // reuse problem. Otherwise, a crib-dragging technique could eliminate the
confidentiality of the
+ // paired contents (this would not reveal the encryption key).
+ // One approach could be to use part of the IV bytes to encode the segment
id. But the segment id has
+ // a potentially unlimited size. And we would still need to differentiate
the IV of the files in the
+ // segment.
+ // The approach taken here is a nonce-misuse resistance, to rely on a
near-0 probability of having two
+ // files sharing the same IV. We use all the remaining IV 11 bytes to
generate 88 secure random bits
+ // nounce.
+ // The probability of having at least two files sharing the same IV is
calculated here for 3 scenarios.
+ // The probability can be estimated by following the Birthday Problem
resolution
+ // (https://en.wikipedia.org/wiki/Birthday_problem).
+ // with n the number of files/IVs, and d the number of possible nounce
values with 11 bytes (2^88), then
+ // with n << d, the probability of having at least two identical IVs can
be estimated by
+ // P(n,d) ~= 1 - exp(- n(n-1)/2d) ~= n²/2d
+ //
+ // Scenario 1 typical - less than 2000 segments files on disk for the
index.
+ // Lucene frequently removes old unused segment files. Even if it takes
some time to remove them on a
+ // busy machine, we can consider there are less than 100 segments for an
index on disk, which gives
+ // less than 2000 files. This would be less with compound files.
+ // P(2000,2^88) ~= 2000²/2^89 ~= 6.5E-21
+ //
+ // Scenario 2 very bad case - the key is changed every 4 months, all the
segments produced are recorded
+ // by an attacker, one commit/segment per 5 seconds, and 20 files per
segment.
+ // Num files recorded = 4*30*24*60*60/5*20 ~= 4.14E7 files
+ // P(4.14E7, 2^88) ~= 2.7E-12
+ //
+ // Scenario 3 awfully bad case - the key is changed every year, all the
segments produced are recorded
+ // by an attacker, one commit/segment per second, and 20 files per segment.
+ // Num files recorded = 365*24*60*60*20 ~= 6.3E8 files
+ // P(6.3E8, 2^88) ~= 6.4E-10
+
byte[] nonce = new byte[IV_LENGTH - COUNTER_LENGTH];
secureRandom.nextBytes(nonce);
byte[] iv = new byte[IV_LENGTH];
diff --git
a/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
b/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
index a1a7fe8..a459330 100644
---
a/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
+++
b/encryption/src/main/java/org/apache/solr/encryption/crypto/DecryptingIndexInput.java
@@ -16,6 +16,7 @@
*/
package org.apache.solr.encryption.crypto;
+import org.apache.lucene.store.FilterIndexInput;
import org.apache.lucene.store.IndexInput;
import java.io.EOFException;
@@ -29,11 +30,12 @@ import static
org.apache.solr.encryption.crypto.AesCtrUtil.*;
* the read-only index files. It can decrypt data previously encrypted with an
{@link EncryptingIndexOutput}.
* <p>It first reads the CTR Initialization Vector (IV). This random IV is not
encrypted. Then it can decrypt the rest
* of the file, which probably contains a header and footer, with random
access.
+ * <p>It is a {@link FilterIndexInput}, so it is possible to {@link
FilterIndexInput#unwrap} it.
*
* @see EncryptingIndexOutput
* @see AesCtrEncrypter
*/
-public class DecryptingIndexInput extends IndexInput {
+public class DecryptingIndexInput extends FilterIndexInput {
/**
* Must be a multiple of {@link AesCtrUtil#AES_BLOCK_SIZE}.
@@ -84,7 +86,7 @@ public class DecryptingIndexInput extends IndexInput {
byte[] key,
AesCtrEncrypterFactory factory,
int bufferCapacity) throws IOException {
- this("Decrypting " + indexInput.toString(),
+ this("Decrypting " + indexInput,
indexInput.getFilePointer() + IV_LENGTH,
indexInput.getFilePointer() + IV_LENGTH,
indexInput.length() - indexInput.getFilePointer() - IV_LENGTH,
@@ -102,7 +104,7 @@ public class DecryptingIndexInput extends IndexInput {
IndexInput indexInput,
AesCtrEncrypter encrypter,
int bufferCapacity) {
- super(resourceDescription);
+ super(resourceDescription, indexInput);
assert delegateOffset >= 0 && sliceOffset >= 0 && sliceLength >= 0;
this.delegateOffset = delegateOffset;
this.sliceOffset = sliceOffset;
diff --git
a/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
b/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
index 04a56f2..fd0e8f0 100644
---
a/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
+++
b/encryption/src/main/java/org/apache/solr/encryption/crypto/EncryptingIndexOutput.java
@@ -17,6 +17,7 @@
package org.apache.solr.encryption.crypto;
import org.apache.lucene.store.BufferedChecksum;
+import org.apache.lucene.store.FilterIndexOutput;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
@@ -32,11 +33,12 @@ import static
org.apache.solr.encryption.crypto.AesCtrUtil.*;
* <p>It generates a cryptographically strong random CTR Initialization Vector
(IV). This random IV is not encrypted and
* is skipped by any {@link DecryptingIndexInput} reading the written data.
Then it can encrypt the rest of the file
* which probably contains a header and footer.
+ * <p>It is a {@link FilterIndexOutput}, so it is possible to {@link
FilterIndexOutput#unwrap} it.
*
* @see DecryptingIndexInput
* @see AesCtrEncrypter
*/
-public class EncryptingIndexOutput extends IndexOutput {
+public class EncryptingIndexOutput extends FilterIndexOutput {
/**
* Must be a multiple of {@link AesCtrUtil#AES_BLOCK_SIZE}.
@@ -81,7 +83,7 @@ public class EncryptingIndexOutput extends IndexOutput {
AesCtrEncrypterFactory factory,
int bufferCapacity)
throws IOException {
- super("Encrypting " + indexOutput.toString(), indexOutput.getName());
+ super("Encrypting " + indexOutput, indexOutput.getName(), indexOutput);
this.indexOutput = indexOutput;
byte[] iv = generateRandomIv();
encrypter = factory.create(key, iv);