I'd like to propose a new RNG for APR, based on a design from D.J.
Bernstein ([1]).

Called "Fast-key-erasure random-number generators" by the author, it
requires 256bits (32 bytes) of initial entropy only, is fast, and
ensures Forward Secrecy.

The current RNGs available in APR are:
1/ apr_generate_random_bytes(); the wrapper around the system call,
2/ apr_random_(in)secure_bytes(); the ones from "random/unix/apr_random.c".

Both are obviously considered secure, but 1/ is usually not very fast
(and could block on some systems), and 2/ requires kilos of initial
entropy (taken from 1/ thus possibly blocking too).
Also the system RNG wrapped by 1/ is unlikely to return a great number
of bytes at once (256 max on some systems like BSDs), and better suits
as a source of entropy (like in 2/).
Neither ensures forward secrecy (AFAICT).

Though better described in [1], I'll try a short description of the
new proposed RNG here...
Based on a stream cipher with 256bit key (either CHACHA20 if available
in the same crypto libs used by APR[-util], or AES256-CTR which should
be available in all), the initial key is generated randomly from the
system's RNG (after this it's not used anymore).
Each time random bytes are requested, a zero-ed buffer is encrypted
with the current key, the first 32 bytes will be the next key and the
remaining bytes are returned. Practically, a buffer of 32 + n bytes is
maintained by the RNG and it's only when the n bytes are exhausted
that the key+buffer is renewed.
Whenever bytes are returned to the caller, they are cleared from the
internal buffer to ensure forward secrecy.
No IV/nonce is needed since the key changes each time.

So the implementation is quite simple, attached is a POC on Linux w/
OpenSSL (not APR-ized yet, for the curious "gcc -g -o fsrng fsrng.c
-lcrypto -lpthread -D MAIN" to compile and eg. "./fsrng $((1024 *
1024)) |xxd -g1 |less" to run).
Less than 200 lines of code (of interest), it's slightly inspired from
what I think is an implementation of this in libpqcrypto ([2]), used
by SUPERCOP ([3]), but that one uses libNaCl so the comparison is not
obvious (the design is there, and it's public domain).
Actually it's more lines than this because there is also some trailing
per-thread (no lock) and global (locked) stuff, with a main() for
everyone to verify that the output are really random :)

All this could be APR-ized quite easily, per-thread (RNG pointer in
Thread Local Storage) is nice for performance so I thought I'd include
it too...

Sounds interesting? Worth it?

Regards,
Yann.

[1] https://blog.cr.yp.to/20170723-random.html
[2] https://libpqcrypto.org/install.html (see download section for the
tarball, and then "./fastrandombytes/fastrandombytes.c")
[3] https://bench.cr.yp.to/supercop.html

PS: FSRNG in the code stands for Forward Secracy RNG, and is only a
"what came to my mind first" name.
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <assert.h>

#include <strings.h>    /* for explicit_bzero() */
#include <sys/random.h> /* for getrandom() */

#include <pthread.h>

#include <openssl/crypto.h>
#include <openssl/objects.h> /* for NID_* */
#include <openssl/evp.h>
#include <openssl/err.h>

#if !defined(NID_chacha20) && !defined(NID_aes_256_ctr)
#error FSRNG requires Chacha20 or AES256-CTR implementation!
#endif

#define FSRNG_KEY_SIZE 32 /* 256 bits for both CHACHA20 and AES256-CTR */
#define FSRNG_DEFAULT_BLOCKS (8000 / FSRNG_KEY_SIZE)

#define FSRNG_LOCKED     (1 << 0)
#define FSRNG_PER_THREAD (1 << 1)
#define FSRNG_FLAGS      (0x3)

typedef struct {
    EVP_CIPHER_CTX *ctx;
    const EVP_CIPHER *cipher;
    unsigned char *key, *rand;
    pthread_mutex_t lock;
    size_t size, pos;
    int flags;
} fsrng_t;

static fsrng_t fsrng_global;

int fsrng_cleanup(fsrng_t *rng)
{
    if (rng == &fsrng_global || rng->flags & FSRNG_PER_THREAD) {
        return EINVAL;
    }

    if (rng->ctx) {
        EVP_CIPHER_CTX_free(rng->ctx);
    }
    if (rng->key) {
        explicit_bzero(rng->key, FSRNG_KEY_SIZE + rng->size);
        free(rng->key);
    }
    if (rng->flags & FSRNG_LOCKED) {
        pthread_mutex_destroy(&rng->lock);
    }

    memset(rng, 0, sizeof(*rng));
    return 0;
}

int fsrng_init(fsrng_t *rng, size_t blocks, int flags)
{
    memset(rng, 0, sizeof(*rng));

    if (blocks > INT_MAX / FSRNG_KEY_SIZE || flags & ~FSRNG_FLAGS) {
        return EINVAL;
    }

    if (flags & FSRNG_LOCKED) {
        int err = pthread_mutex_init(&rng->lock, NULL);
        if (err) {
            return err;
        }
    }
    rng->flags = flags;

    rng->ctx = EVP_CIPHER_CTX_new();
    if (!rng->ctx) {
        fsrng_cleanup(rng);
        return ENOMEM;
    }

#if defined(NID_chacha20)
    rng->cipher = EVP_chacha20();
#else
    rng->cipher = EVP_aes_256_ctr();
#endif

    if (EVP_EncryptInit_ex(rng->ctx, rng->cipher, NULL, NULL, NULL) <= 0) {
        fsrng_cleanup(rng);
        return ENOMEM;
    }

    if (blocks == 0) {
        blocks = FSRNG_DEFAULT_BLOCKS;
    }
    rng->size = rng->pos = blocks * FSRNG_KEY_SIZE;
    rng->key = malloc(FSRNG_KEY_SIZE + rng->size);
    if (!rng->key) {
        fsrng_cleanup(rng);
        return ENOMEM;
    }
    if (getrandom(rng->key, FSRNG_KEY_SIZE, 0) == -1) {
        int err = errno;
        fsrng_cleanup(rng);
        return err;
    }
    rng->rand = rng->key + FSRNG_KEY_SIZE;
    memset(rng->rand, 0, rng->size);

    return 0;
}

int fsrng_flags(const fsrng_t *rng)
{
    return rng->flags;
}

static void fsrng_mix(fsrng_t *rng, unsigned char *to)
{
    int len;

    /* Make this pass with the current key and renew it inplace (zeroed) */
    EVP_EncryptInit_ex(rng->ctx, NULL, NULL, rng->key, NULL);
    EVP_CIPHER_CTX_set_padding(rng->ctx, 0);
    memset(rng->key, 0, FSRNG_KEY_SIZE);

    /* No finalization needed with no padding and size % FSRNG_KEY_SIZE == 0 */
    EVP_EncryptUpdate(rng->ctx, rng->key, &len, rng->key, FSRNG_KEY_SIZE);
    EVP_EncryptUpdate(rng->ctx, to, &len, rng->rand, rng->size);
}

void fsrng_seed(fsrng_t *rng, const unsigned char *buf, size_t len)
{
    size_t n, i = 0;

    if (rng->flags & FSRNG_LOCKED) {
        pthread_mutex_lock(&rng->lock);
    }
    for (n = 0; n < len; ++n) {
        rng->key[i++] ^= buf[n];
        if (i == FSRNG_KEY_SIZE) {
            i = 0;
        }
    }
    if (rng->flags & FSRNG_LOCKED) {
        pthread_mutex_unlock(&rng->lock);
    }
}

int fsrng_bytes(fsrng_t *rng, unsigned char *buf, size_t len)
{
    size_t n, size = rng->size;

    if (rng->flags & FSRNG_LOCKED) {
        pthread_mutex_lock(&rng->lock);
    }
    while (len) {
        if (rng->pos == size) {
            if (len >= size) {
                do {
                    fsrng_mix(rng, buf);
                    buf += size;
                    len -= size;
                } while (len >= size);
                if (!len) {
                    break;
                }
            }
            fsrng_mix(rng, rng->rand);
            rng->pos = 0;
        }

        /* Random bytes are consumed then zero-ed to ensure
         * both forward secrecy and cleared next mixed data.
         */
        n = size - rng->pos;
        if (n > len) {
            n = len;
        }
        memcpy(buf, rng->rand + rng->pos, n);
        memset(rng->rand + rng->pos, 0, n);
        rng->pos += n;

        buf += n;
        len -= n;
    }
    if (rng->flags & FSRNG_LOCKED) {
        pthread_mutex_unlock(&rng->lock);
    }

    return 0;
}


static size_t fsrng_thread_blocks,
              fsrng_thread_blocks_init;
static pthread_key_t fsrng_thread_key;

static void fsrng_thread_dtor(void *rng)
{
    fsrng_cleanup(rng);
    free(rng);
}

static void fsrng_thread_once(void)
{
    if (pthread_key_create(&fsrng_thread_key, fsrng_thread_dtor) == 0) {
        fsrng_thread_blocks = fsrng_thread_blocks_init;
    }
}

int fsrng_thread_init(size_t blocks)
{
    static pthread_once_t once = PTHREAD_ONCE_INIT;
    
    if (fsrng_thread_blocks) {
        return EALREADY;
    }
    if (blocks > INT_MAX / FSRNG_KEY_SIZE) {
        return EINVAL;
    }

    fsrng_thread_blocks_init = blocks ? blocks : FSRNG_DEFAULT_BLOCKS;
    pthread_once(&once, fsrng_thread_once);
    if (!fsrng_thread_blocks) {
        return ENOMEM;
    }

    return 0;
}

fsrng_t *fsrng_thread_self(void)
{
    fsrng_t *rng;

    if (fsrng_thread_init(0) == ENOMEM) {
        return NULL;
    }

    rng = pthread_getspecific(fsrng_thread_key);
    if (!rng) {
        rng = malloc(sizeof(*rng));
        if (!rng) {
            return NULL;
        }

        if (fsrng_init(rng, fsrng_thread_blocks, FSRNG_PER_THREAD) != 0) {
            free(rng);
            return NULL;
        }

        pthread_setspecific(fsrng_thread_key, rng);
    }
    return rng;
}

int fsrng_thread_bytes(unsigned char *buf, size_t len)
{
    fsrng_t *rng;

    rng = fsrng_thread_self();
    if (!rng) {
        return ENOMEM;
    }

    return fsrng_bytes(rng, buf, len);
}


static size_t fsrng_global_blocks,
              fsrng_global_blocks_init;

static void fsrng_global_once(void)
{
    if (fsrng_init(&fsrng_global, fsrng_global_blocks_init, FSRNG_LOCKED) == 0) {
        fsrng_global_blocks = fsrng_global_blocks_init;
    }
}

int fsrng_global_init(size_t blocks)
{
    static pthread_once_t once = PTHREAD_ONCE_INIT;

    if (fsrng_global_blocks) {
        return EALREADY;
    }
    if (blocks > INT_MAX / FSRNG_KEY_SIZE) {
        return EINVAL;
    }

    fsrng_global_blocks_init = blocks ? blocks : FSRNG_DEFAULT_BLOCKS;
    pthread_once(&once, fsrng_global_once);
    if (!fsrng_global_blocks) {
        return ENOMEM;
    }

    return 0;
}

int fsrng_global_bytes(unsigned char *buf, size_t len)
{
    if (fsrng_global_init(0) == ENOMEM) {
        return ENOMEM;
    }

    return fsrng_bytes(&fsrng_global, buf, len);
}

fsrng_t *fsrng_global_instance(void)
{
    return &fsrng_global;
}

#ifdef MAIN
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, const char *argv[])
{
    unsigned char *rand;
    ssize_t size;

#if OPENSSL_VERSION_NUMBER < 0x10100000L
    CRYPTO_malloc_init();
#else
    OPENSSL_malloc_init();
#endif
    ERR_load_crypto_strings();
    OpenSSL_add_all_algorithms();

    size = (argc > 1) ? atoi(argv[1]) : 0;
    if (size <= 0) {
        size = 16;
    }
    rand = malloc(size);
    if (argc > 2) {
        fsrng_thread_bytes(rand, size);
    }
    else {
        fsrng_global_bytes(rand, size);
    }
    return write(STDOUT_FILENO, rand, size) != size;
}
#endif

Reply via email to