Hi, Julius!

On Jul 01, Julius Goryavsky wrote:
> revision-id: cf0fa125877 (mariadb-10.5.2-177-gcf0fa125877)
> parent(s): e7ca9ec6f6b
> author: Julius Goryavsky <julius.goryav...@mariadb.com>
> committer: Julius Goryavsky <julius.goryav...@mariadb.com>
> timestamp: 2020-04-20 15:28:49 +0200
> message:
> 
> MENT-471: Plugin implementation for the Hashicorp Vault KMS
> 
> - Authentication is done using the Hashicorp Vault's token
>   authentication method;
> - If additional client authentication is required, then the
>   path to the CA authentication bundle file may be passed
>   as a plugin parameter;
> - The creation of the keys and their management is carried
>   out using the Hashicorp Vault KMS and their tools;
> - Key values stored as hexadecimal strings;
> - Key values caching is supported.
> - The plugin uses libcurl (https) as an interface to
>   the HashiCorp Vault server;
> - JSON parsing is performed through the JSON service
>   (through the include/mysql/service_json.h);
> - HashiCorp Vault 1.2.4 was used for development and testing.
> 
> ---
>  mysql-test/suite/vault/disabled.def                |  11 +
>  mysql-test/suite/vault/my.cnf                      |   1 +
>  mysql-test/suite/vault/r/hashicorp_encode.result   |  45 ++
>  mysql-test/suite/vault/suite.pm                    |  27 +
>  mysql-test/suite/vault/t/hashicorp_deinit.inc      |   6 +
>  mysql-test/suite/vault/t/hashicorp_encode.opt      |   5 +
>  mysql-test/suite/vault/t/hashicorp_encode.test     |   3 +
>  mysql-test/suite/vault/t/hashicorp_goodtest.inc    |  20 +
>  mysql-test/suite/vault/t/hashicorp_init.inc        |  32 +
>  mysql-test/suite/vault/t/hashicorp_plugin.inc      |   1 +

please, put the plugin specific test suite into the plugin dir:

 plugin/hashicorp_key_management/mysql-test/vault

>  plugin/hashicorp_key_management/CMakeLists.txt     |  11 +
>  .../hashicorp_key_management_plugin.cc             | 899 
> +++++++++++++++++++++
>  plugin/hashicorp_key_management/readme.txt         | 138 ++++
>  14 files changed, 1203 insertions(+)
> 
> diff --git a/mysql-test/suite/vault/disabled.def 
> b/mysql-test/suite/vault/disabled.def
> --- /dev/null
> +++ b/mysql-test/suite/vault/disabled.def
> @@ -0,0 +1,11 @@
> +##############################################################################
> +#
> +#  List the test cases that are to be disabled temporarily.
> +#
> +#  Separate the test case name and the comment with ':'.
> +#
> +#    <testcasename> : BUG#<xxxx> <date disabled> <disabler> <comment>
> +#
> +#  Do not use any TAB characters for whitespace.
> +#
> +##############################################################################

you don't have to create and commit this template

> diff --git 
> a/mysql-test/suite/vault/include/have_hashicorp_key_management_plugin.inc 
> b/mysql-test/suite/vault/include/have_hashicorp_key_management_plugin.inc
> --- /dev/null
> +++ b/mysql-test/suite/vault/include/have_hashicorp_key_management_plugin.inc
> @@ -0,0 +1,4 @@
> +if (`SELECT COUNT(*)=0 FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME = 
> 'hashicorp_key_management' AND PLUGIN_STATUS='ACTIVE'`)
> +{
> +  --skip Test requires active hashicorp_key_management plugin
> +}
> diff --git a/mysql-test/suite/vault/my.cnf b/mysql-test/suite/vault/my.cnf
> --- /dev/null
> +++ b/mysql-test/suite/vault/my.cnf
> @@ -0,0 +1 @@
> +!include include/default_my.cnf

and you definitely should not create suite my.cnf that only includes the
default my.cnf. You only need a custom my.cnf if it adds some extra
options that are not default.

> diff --git a/mysql-test/suite/vault/suite.pm b/mysql-test/suite/vault/suite.pm
> --- /dev/null
> +++ b/mysql-test/suite/vault/suite.pm
> @@ -0,0 +1,27 @@
> +package My::Suite::Vault;
> +
> +@ISA = qw(My::Suite);
> +
> +use strict;
> +
> +if (::IS_WINDOWS) {
> +  return "The test suite for Hashicorp Vault plugin currently does not 
> support Windows";
> +}

instead of a custom check, you'd better add

   source include/not_windows.inc

into your have_hashicorp_key_management_plugin.inc

> +
> +return "Hashicorp Vault management utility not found"
> +  unless ::which("vault");
> +
> +return "You need to set the value of the VAULT_ADDR variable"
> +  unless $ENV{VAULT_ADDR};
> +
> +return "You need to set the value of the VAULT_TOKEN variable"
> +  unless $ENV{VAULT_TOKEN};
> +
> +sub skip_combinations {
> +  my %skip;
> +  $skip{'include/have_hashicorp_key_management_plugin.inc'} = 'Needs dynamic 
> hashicorp_key_management plugin'
> +     unless $ENV{HASHICORP_KEY_MANAGEMENT_SO};

this is rather redundant, isn't it?
have_hashicorp_key_management_plugin.inc itself checks for the plugin
and skips. If you remove your skip_combinations() function, the behavior
will be exactly the same.

Normally one needs to skip tests from suite.pm for conditions that
cannot be checked from inside the test.

> +  %skip;
> +}
> +
> +bless {};
> diff --git a/mysql-test/suite/vault/t/hashicorp_deinit.inc 
> b/mysql-test/suite/vault/t/hashicorp_deinit.inc
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_deinit.inc
> @@ -0,0 +1,6 @@
> +perl;
> +   `vault secrets disable test`;

1. perl has "system" command, one normally uses backticks when one needs
the output.

2. mysqltest will write ths small perl script into a file and will run
perl on it. How is this overhead better than just

   exec vault secrets disable test;

with no perl at all?

3. why did you create a separate hashicorp_deinit.inc if you only use
it once?

> +   if (($? >> 8) != 0) {
> +     die "Vault: unable to destroy path for test keys";
> +   }
> +EOF
> diff --git a/mysql-test/suite/vault/t/hashicorp_init.inc 
> b/mysql-test/suite/vault/t/hashicorp_init.inc
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_init.inc
> @@ -0,0 +1,32 @@
> +perl;
> +my $list = `vault secrets list`;
> +if (($? >> 8) != 0) {
> +  die "Unable to retrieve secrets list - Hashicorp Vault not configured 
> properly or not enough permissions";
> +}
> +if ($list !~ /(^|\n|\r)test\/[\s\t]+kv[\s\t]+/) {
> +  `vault secrets enable -path /test -version=2 kv`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to create path for test keys";
> +  }
> +  `vault kv put /test/1 data="123456789ABCDEF0123456789ABCDEF0"`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to create first secret key";
> +  }
> +  `vault kv put /test/2 data="23456789ABCDEF0123456789ABCDEF01"`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to create second secret key";
> +  }
> +  `vault kv put /test/33 data="00000000000000000000000000000000"`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to create third secret key";
> +  }
> +  `vault kv put /test/33 data="3456789ABCDEF0123456789ABCDEF012"`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to update third secret key";
> +  }
> +  `vault kv put /test/4 data="456789ABCDEF0123456789ABCDEF0123"`;
> +  if (($? >> 8) != 0) {
> +    die "Vault: unable to create fourth secret key";
> +  }
> +}
> +EOF

same as above. you shouldn't abuse backticks, don't need perl and you
don't need a separate file. Use something like directly in your
hashicorp_goodtest.test

    exec vault secrets disable test; # start from a clean state
    exec vault secrets enable -path /test -version=2 kv;
    exec vault kv put /test/1 data="123456789ABCDEF0123456789ABCDEF0";
    exec vault kv put /test/2 data="23456789ABCDEF0123456789ABCDEF01";
    exec vault kv put /test/33 data="00000000000000000000000000000000";
    exec vault kv put /test/33 data="3456789ABCDEF0123456789ABCDEF012";
    exec vault kv put /test/4 data="456789ABCDEF0123456789ABCDEF0123";

also, I'd recommend to use a more specific secret name, like mariadbtest

> diff --git a/mysql-test/suite/vault/t/hashicorp_goodtest.inc 
> b/mysql-test/suite/vault/t/hashicorp_goodtest.inc
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_goodtest.inc
> @@ -0,0 +1,20 @@
> +--source include/have_innodb.inc
> +--source hashicorp_plugin.inc
> +
> +SHOW GLOBAL variables WHERE Variable_name LIKE "hashicorp%" AND 
> Variable_name NOT LIKE "%vault_url";

normally it's done like

  replace_result $VAULT_ADDR VAULT_ADDR;
  SHOW GLOBAL variables LIKE "hashicorp%";

that is, you show all variables and replace away the dynamic part.

> +
> +create table t1(c1 bigint not null, b char(200)) engine=innodb encrypted=yes 
> encryption_key_id=1;
> +show create table t1;
> +insert t1 values (12345, repeat('1234567890', 20));
> +
> +alter table t1 encryption_key_id=2;
> +show create table t1;
> +--error ER_ILLEGAL_HA_CREATE_OPTION
> +alter table t1 encryption_key_id=3;
> +show create table t1;
> +alter table t1 encryption_key_id=33;
> +show create table t1;
> +alter table t1 encryption_key_id=4;
> +show create table t1;

where's a test for key rotation?

where's an encrypt/restart/decrypt test?

> +
> +drop table t1;
> diff --git a/mysql-test/suite/vault/t/hashicorp_encode.opt 
> b/mysql-test/suite/vault/t/hashicorp_encode.opt
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_encode.opt
> @@ -0,0 +1,5 @@
> +--plugin-load-add=$HASHICORP_KEY_MANAGEMENT_SO
> +--loose-hashicorp-key-management
> +--loose-hashicorp-key-management-vault-url="$VAULT_ADDR/v1/test/"
> +--loose-hashicorp-key-management-token="$VAULT_TOKEN"
> +--loose-hashicorp-key-management-timeout=60
> diff --git a/mysql-test/suite/vault/t/hashicorp_encode.test 
> b/mysql-test/suite/vault/t/hashicorp_encode.test
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_encode.test
> @@ -0,0 +1,3 @@
> +source hashicorp_init.inc;
> +source hashicorp_goodtest.inc;
> +source hashicorp_deinit.inc;
> diff --git a/mysql-test/suite/vault/t/hashicorp_plugin.inc 
> b/mysql-test/suite/vault/t/hashicorp_plugin.inc
> --- /dev/null
> +++ b/mysql-test/suite/vault/t/hashicorp_plugin.inc
> @@ -0,0 +1 @@
> +--source include/have_hashicorp_key_management_plugin.inc

another file that it's only used once? what's the point?
include have_hashicorp_key_management_plugin.inc directly into your
hashicorp_encode.test

> diff --git 
> a/plugin/hashicorp_key_management/hashicorp_key_management_plugin.cc 
> b/plugin/hashicorp_key_management/hashicorp_key_management_plugin.cc
> --- /dev/null
> +++ b/plugin/hashicorp_key_management/hashicorp_key_management_plugin.cc
> @@ -0,0 +1,899 @@
> +/* Copyright (C) 2019 MariaDB Corporation

2019, 2020

> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; version 2 of the License.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
> +
> +#include <mysql/plugin_encryption.h>
> +#include <mysqld_error.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <errno.h>
> +#include <string>
> +#include <sstream>
> +#include <curl/curl.h>
> +#ifdef _WIN32
> +#include <malloc.h>
> +#define alloca _alloca
> +#else
> +#include <alloca.h>
> +#endif
> +#include <algorithm>
> +#include <map>
> +#include <mutex>
> +
> +static std::mutex mtx;
> +
> +/* Key information structure: */
> +typedef struct KEY_INFO
> +{
> +  unsigned int key_id;
> +  unsigned int key_version;
> +  unsigned int length;
> +  unsigned char data [MY_AES_MAX_KEY_LENGTH];
> +public:
> +  KEY_INFO() : key_id(0), key_version(0), length(0) {};
> +} KEY_INFO;
> +
> +/* Cache for the latest version, per key id: */
> +static std::map<unsigned int, unsigned int> latest_version_cache;
> +
> +/* Cache for key information: */
> +static std::map<unsigned long long, KEY_INFO> key_info_cache;

why not unordered_map here and above? where do you use the ordering
property?

> +
> +#define KEY_ID_AND_VERSION(key_id, version) \
> +  ((unsigned long long)key_id << 32 | version)
> +
> +static void cache_add (unsigned int key_id, unsigned int key_version,
> +                       const KEY_INFO* info)
> +{
> +  mtx.lock();
> +  latest_version_cache[key_id]=
> +    std::max(latest_version_cache[key_id], key_version);
> +  key_info_cache[KEY_ID_AND_VERSION(key_id, key_version)]= *info;
> +  mtx.unlock();
> +}
> +
> +static unsigned int
> +  cache_get (unsigned int key_id, unsigned int key_version,
> +             unsigned char* data, unsigned int* buflen)
> +{
> +  unsigned int ver= key_version;
> +  mtx.lock();
> +  if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
> +  {
> +    ver= latest_version_cache[key_id];
> +    if (ver == 0)
> +    {
> +      mtx.unlock();
> +      return ENCRYPTION_KEY_VERSION_INVALID;
> +    }
> +  }
> +  KEY_INFO info= key_info_cache[KEY_ID_AND_VERSION(key_id, ver)];
> +  mtx.unlock();
> +  unsigned int length= info.length;
> +  if (length == 0)
> +  {
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  unsigned int max_length = *buflen;
> +  *buflen = length;
> +  if (max_length < length)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Encryption key buffer too small",
> +                    ME_ERROR_LOG_ONLY | ME_NOTE);

I don't think you need an error here, this is not an error from the
caller point of view, but just a way to query key length.

> +    return ENCRYPTION_KEY_BUFFER_TOO_SMALL;
> +  }
> +  memcpy(data, info.data, length);
> +  return 0;
> +}
> +
> +static unsigned int cache_get_version (unsigned int key_id)
> +{
> +  unsigned int ver;
> +  mtx.lock();
> +  ver= latest_version_cache[key_id];
> +  mtx.unlock();
> +  if (ver)
> +  {
> +    return ver;
> +  }
> +  else
> +  {
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +}
> +
> +static char* vault_url;
> +static char* token;
> +static char* vault_ca;
> +int timeout;
> +int max_retries;
> +char caching_enabled;
> +char use_cache_on_timeout;

why the other four aren't static?

> +
> +static MYSQL_SYSVAR_STR(vault_ca, vault_ca,
> +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
> +  "Path to the Certificate Authority (CA) bundle (is a file "
> +  "that contains root and intermediate certificates)",
> +  NULL, NULL, "");
> +
> +static MYSQL_SYSVAR_STR(vault_url, vault_url,
> +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
> +  "HTTP[s] URL that is used to connect to the Hashicorp Vault server",
> +  NULL, NULL, "https://127.0.0.1:8200/v1/kv";);

Is this a reasonable default for a vault url?
I'd rather keep it empty.

> +
> +static MYSQL_SYSVAR_STR(token, token,
> +  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
> +  "Authentication token that passed to the Hashicorp Vault "
> +  "in the request header",
> +  NULL, NULL, "");
> +
> +static MYSQL_SYSVAR_INT(timeout, timeout,
> +  PLUGIN_VAR_RQCMDARG,
> +  "Duration (in seconds) for the Hashicorp Vault server "
> +  "connection timeout",
> +  NULL, NULL, 15, 0, 86400, 1);
> +
> +static MYSQL_SYSVAR_INT(max_retries, max_retries,
> +  PLUGIN_VAR_RQCMDARG,
> +  "Number of server request retries in case of timeout",
> +  NULL, NULL, 3, 0, INT_MAX, 1);
> +
> +static MYSQL_SYSVAR_BOOL(caching_enabled, caching_enabled,
> +  PLUGIN_VAR_RQCMDARG,
> +  "Enable key caching (storing key values received from "
> +  "the Hashicorp Vault server in the local memory)",
> +  NULL, NULL, 1);
> +
> +static MYSQL_SYSVAR_BOOL(use_cache_on_timeout, use_cache_on_timeout,
> +  PLUGIN_VAR_RQCMDARG,
> +  "In case of timeout (when accessing the vault server) "
> +  "use the value taken from the cache",
> +  NULL, NULL, 0);
> +
> +static struct st_mysql_sys_var *settings[] = {
> +  MYSQL_SYSVAR(vault_url),
> +  MYSQL_SYSVAR(token),
> +  MYSQL_SYSVAR(vault_ca),
> +  MYSQL_SYSVAR(timeout),
> +  MYSQL_SYSVAR(max_retries),
> +  MYSQL_SYSVAR(caching_enabled),
> +  MYSQL_SYSVAR(use_cache_on_timeout),
> +  NULL
> +};
> +
> +static std::string get_error_from_curl (CURLcode curl_code, char 
> *curl_errbuf)
> +{
> +  size_t len = strlen(curl_errbuf);
> +  std::ostringstream stream;
> +  if (curl_code != CURLE_OK)
> +  {
> +    stream << "CURL returned this error code: " << curl_code;
> +    stream << " with error message : ";
> +    if (len)
> +      stream << curl_errbuf;
> +    else
> +      stream << curl_easy_strerror(curl_code);
> +  }
> +  return stream.str();
> +}
> +
> +/*
> +  Reasonable length limit to protect against accidentally reading
> +  the wrong key or from trying to overload the server with unnecessary
> +  work to receive too long responses to requests:
> +*/
> +#define max_response_size 131072

conventionally, it should be uppercase

> +
> +static size_t write_response_memory (void *contents, size_t size, size_t 
> nmemb,
> +                                     void *userp)
> +{
> +  size_t realsize = size * nmemb;
> +  if (size != 0 && realsize / size != nmemb)
> +    return 0; // multiplication overflow detected

this cannot happen, size is documented to be always 1, and 
the max data size is CURL_MAX_WRITE_SIZE anyway.

> +  std::ostringstream *read_data = static_cast<std::ostringstream *>(userp);
> +  size_t current_length = read_data->tellp();
> +  if (current_length + realsize > max_response_size)
> +    return 0; // response size limit exceeded
> +  read_data->write(static_cast<char *>(contents), realsize);
> +  if (!read_data->good())
> +    return 0;
> +  return realsize;
> +}
> +
> +enum {
> +   OPERATION_OK,
> +   OPERATION_TIMEOUT,
> +   OPERATION_ERROR
> +};
> +
> +static CURLcode
> +  perform_with_retries (CURL *curl, std::ostringstream *read_data_stream)
> +{
> +  int retries= max_retries;
> +  CURLcode curl_res;
> +  do {
> +    curl_res= curl_easy_perform(curl);
> +    if (curl_res != CURLE_OPERATION_TIMEDOUT)
> +    {
> +      break;
> +    }
> +    read_data_stream->clear();
> +    read_data_stream->str("");

I presume, this wasn't tested at all?

> +  } while (retries--);
> +  return curl_res;
> +}
> +
> +#define max_token_size 32768

uppercase again

> +
> +static size_t token_len;
> +
> +static int curl_run (char *url, std::string *response, bool soft_timeout)
> +{
> +  const static char *x_vault_token = "X-Vault-Token:";
> +  const static size_t x_vault_token_len = strlen(x_vault_token);
> +/*
> +  Checking the maximum allowable length to protect
> +  against allocating too much memory on the stack:
> +*/
> +  if (token_len > max_token_size)

32K is a lot, tokens are much shorter and stack isn't big.
You can safely reduce max_token_size to, say 8K.

and why do you check it here in curl_run anyway?
you can check it in hashicorp_key_management_plugin_init. once.

> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Maximum allowed token length exceeded",
> +                    ME_ERROR_LOG_ONLY);
> +    return OPERATION_ERROR;
> +  }
> +  char curl_errbuf[CURL_ERROR_SIZE];
> +  std::ostringstream read_data_stream;
> +  long http_code = 0;
> +  struct curl_slist *list = NULL;
> +  CURLcode curl_res = CURLE_OK;
> +  CURL *curl = curl_easy_init();
> +  if (curl == NULL)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Cannot initialize curl session",
> +                    ME_ERROR_LOG_ONLY);
> +    return OPERATION_ERROR;
> +  }
> +  size_t buf_len = x_vault_token_len + token_len + 1;
> +  char *token_header = (char *) alloca(buf_len);
> +  snprintf(token_header, buf_len, "%s%s", x_vault_token, token);

also there's no need to allocate the buffer on the stack and concatenate
the header here every time, you can do it once in
hashicorp_key_management_plugin_init too.

> +  curl_errbuf[0] = '\0';
> +  if ((list= curl_slist_append(list, token_header)) == NULL ||

and you can prepare slist only once too (I think).

> +      (curl_res= curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf)) !=
> +          CURLE_OK ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
> +                                  write_response_memory)) != CURLE_OK ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_WRITEDATA,
> +                                  &read_data_stream)) !=
> +          CURLE_OK ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list)) !=
> +          CURLE_OK ||
> +      /*
> +        The options CURLOPT_SSL_VERIFYPEER and CURLOPT_SSL_VERIFYHOST are
> +        set explicitly to withstand possible future changes in curl defaults:
> +      */
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1)) !=
> +          CURLE_OK ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 2L)) !=
> +          CURLE_OK ||
> +      (strlen(vault_ca) != 0 &&
> +       (curl_res= curl_easy_setopt(curl, CURLOPT_CAINFO, vault_ca)) !=
> +           CURLE_OK) ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_USE_SSL, CURLUSESSL_ALL)) !=
> +          CURLE_OK ||
> +      (curl_res= curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L)) !=
> +          CURLE_OK ||
> +      (timeout &&
> +       ((curl_res= curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, timeout)) 
> !=
> +            CURLE_OK ||
> +        (curl_res= curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout)) !=
> +            CURLE_OK)) ||
> +      (curl_res = curl_easy_setopt(curl, CURLOPT_URL, url)) != CURLE_OK ||
> +      (curl_res = perform_with_retries(curl, &read_data_stream)) != CURLE_OK 
> ||
> +      (curl_res = curl_easy_getinfo (curl, CURLINFO_RESPONSE_CODE,
> +                                     &http_code)) != CURLE_OK)
> +  {
> +    curl_slist_free_all(list);
> +    curl_easy_cleanup(curl);
> +    if (soft_timeout && curl_res == CURLE_OPERATION_TIMEDOUT)
> +    {
> +      return OPERATION_TIMEOUT;
> +    }
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    get_error_from_curl(curl_res, curl_errbuf).c_str(),

really? strlen, ostringstream, append/append/append, then you return
std::string and then print its c_str()?

It's printf, you can do directly

 my_printf_error(ER_UNKNOWN_ERROR, "CURL returned this error code %u"
    "  with error message: %s", curl_res, curl_errbuf);

Which is much easier to read, doesn't do all these string operations and
reallocs, and it's also safe, because your variant will crash if any of
the curl error messages will include % sign.

> +                    ME_ERROR_LOG_ONLY);
> +    return OPERATION_ERROR;
> +  }
> +  curl_slist_free_all(list);
> +  curl_easy_cleanup(curl);
> +  *response = read_data_stream.str();
> +  bool is_error = http_code < 200 || http_code >= 300;
> +  if (is_error)
> +  {
> +    const char *res = response->c_str();
> +    /*
> +      Error 404 requires special handling - in case the server
> +      returned an empty array of error strings (the value of the
> +      "error" object in JSON is equal to an empty array), we should
> +      ignore this error at this level, since this means the missing
> +      key (this problem is handled at a higher level), but if the
> +      error object contains anything other than empty array, then
> +      we need to print the error message to the log:
> +    */
> +    if (http_code == 404)
> +    {
> +      const char *err;
> +      int err_len;
> +      if (json_get_object_key(res, res + strlen(res),
> +                              "errors", &err, &err_len) == JSV_ARRAY)
> +      {
> +        const char *ev;
> +        int ev_len;
> +        if (json_get_array_item(err, err + err_len, 0, &ev, &ev_len) ==
> +            JSV_NOTHING)
> +        {
> +          *response = std::string("");
> +          is_error = false;
> +        }
> +      }
> +    }
> +    if (is_error)
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Hashicorp server error: %d, response: %s",
> +                      ME_ERROR_LOG_ONLY | ME_WARNING, http_code, res);
> +    }
> +  }
> +  return is_error ? OPERATION_ERROR : OPERATION_OK;
> +}
> +
> +#define max_url_size 32768

uppercase

> +
> +static size_t vault_url_len;
> +
> +static unsigned int get_latest_version (unsigned int key_id)
> +{
> +  std::string response_str;
> +  const char *response;
> +  const char *js, *ver;
> +  int js_len, ver_len;
> +  size_t response_len;
> +/*
> +  Checking the maximum allowable length to protect
> +  against allocating too much memory on the stack:
> +*/
> +  if (vault_url_len > max_url_size)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Maximum allowed vault URL length exceeded",
> +                    0);
> +    return true;
> +  }
> +/*
> +  Maximum buffer length = url length plus 20 characters of
> +  a 64-bit unsigned integer, plus a slash character, plus
> +  a length of the "data/" string and plus a zero byte:
> +*/

please, indent all comments properly (here and above). now they'll
cause diffs to be rather unreadable.

> +  size_t buf_len = vault_url_len + (20 + 5 + 2);
> +  char *url = (char *) alloca(buf_len);
> +  if (vault_url_len && vault_url[vault_url_len - 1] != '/')
> +  {
> +    snprintf(url, buf_len, "%s/data/%u", vault_url, key_id);
> +  }
> +  else
> +  {
> +    snprintf(url, buf_len, "%sdata/%u", vault_url, key_id);
> +  }

same comment as above about run-time checks.
You can make sure that vault_url_len is within limits and vault_url
doesn't end with a slash in hashicorp_key_management_plugin_init

> +  bool use_cache= caching_enabled && use_cache_on_timeout;
> +  int rc;
> +  if ((rc= curl_run(url, &response_str, use_cache)) != OPERATION_OK)

would be good to have some cache expiration time and skip curl
completely if the cached value is sufficiently new.

e.g. instead of caching_enabled you can have caching_lifetime in
seconds. Where 0 means "no caching".

> +  {
> +    if (rc == OPERATION_TIMEOUT)
> +    {
> +      unsigned int ver = cache_get_version(key_id);
> +      if (ver != ENCRYPTION_KEY_VERSION_INVALID)
> +      {
> +        return ver;
> +      }
> +    }
> +    my_printf_error(ER_UNKNOWN_ERROR, "Unable to get key data", 0);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  response = response_str.c_str();
> +  response_len = response_str.size();
> +  /*
> +    If the key is not found, this is not considered a fatal error,
> +    but we need to add an informational message to the log:
> +  */
> +  if (response_len == 0)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Key not found",

always specify the plugin name in any my_printf_error.
Having only

  2020-06-29  1:41:23 28 [ERRROR] Key not found

is not very helpful.

> +                    ME_ERROR_LOG_ONLY | ME_NOTE);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (json_get_object_key(response, response + response_len, "data",
> +                          &js, &js_len) != JSV_OBJECT)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get data object (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (json_get_object_key(js, js + js_len, "metadata",
> +                          &ver, &ver_len) != JSV_OBJECT)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get metadata object (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (json_get_object_key(ver, ver + ver_len, "version",
> +                          &ver, &ver_len) != JSV_NUMBER)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get version number (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  /*
> +    Here we may use atoi, but it has undefined overflow behavior
> +    and does not work with unsigned integers, so use strtoul:
> +  */

No need to explain why you didn't use atoi, this is not helpful.
Or, if you'd like to explain, explain why you didn't use atol, atof,
strtod, and sscanf :)

> +  errno = 0;
> +  unsigned long version = strtoul(ver, NULL, 10);
> +  /*
> +    If the result of the conversion does not fit into an
> +    unsigned integer or if an error (for example, overflow)
> +    occurred during the conversion, then we need to return
> +    an error:
> +  */

thanks, but do not say in a comment what the code does.
explain *why* it does it. But in this case both are very clear, so just
remove the comment above. Thanks.

> +  if (version > UINT_MAX || (version == ULONG_MAX && errno))
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Integer conversion error (for version number)",

no "http response is: %s" here?

> +                    0);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  unsigned int int_ver= (unsigned int) version;
> +  if (!caching_enabled)
> +  {
> +    return int_ver;
> +  }
> +  if (json_get_object_key(js, js + js_len, "data",
> +                          &js, &js_len) != JSV_OBJECT)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get second-level data object "
> +                    "(http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  const char* key;
> +  int key_len;
> +  if (json_get_object_key(js, js + js_len, "data",
> +                          &key, &key_len) != JSV_STRING)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get data string (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  KEY_INFO info;
> +  if (((unsigned int) key_len >> 1) > sizeof(info.data))
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Encryption key data is too long",
> +                    ME_ERROR_LOG_ONLY | ME_NOTE);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  unsigned int length = 0;
> +  while (key_len >= 2)
> +  {
> +    int c1 = key[0];
> +    int c2 = key[1];
> +    if (! isxdigit(c1) || ! isxdigit(c2))
> +    {
> +      break;
> +    }
> +    c1 -= '0';
> +    c2 -= '0';
> +    if (c1 > 9)
> +    {
> +      c1 -= 'A' - '0';
> +      if (c1 > 15)
> +      {
> +        c1 -= 'a' - 'A';
> +      }
> +    }
> +    if (c2 > 9)
> +    {
> +      c2 -= 'A' - '0';
> +      if (c1 > 15)
> +      {
> +        c2 -= 'a' - 'A';
> +      }
> +    }

please write a macro or an inline function for that. don't repeat the
same code twice. E.g.

  static inline int c2xdigit(char *c)
  {
    return c >= 'a' ? c-'a'+10 : c >= 'A' ? c-'A'+10 : c - '0';
  }

> +    info.data[length++] = (c1 << 4) + c2;
> +    key += 2;
> +    key_len -= 2;
> +  }
> +  if (key_len)
> +  {
> +    if (key_len != 1)
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Syntax error - the key data should contain only "
> +                      "hexadecimal digits",
> +                      0);
> +    }
> +    else
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Syntax error - extra character in the key data",
> +                      0);
> +    }
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  info.key_id = key_id;
> +  info.key_version = int_ver;
> +  info.length = length;
> +  cache_add(key_id, int_ver, &info);
> +  return int_ver;
> +}
> +
> +static unsigned int get_key_from_vault (unsigned int key_id,
> +                                        unsigned int key_version,
> +                                        unsigned char *dstbuf,
> +                                        unsigned int *buflen)
> +{
> +  if (caching_enabled &&
> +      cache_get(key_id, key_version, dstbuf, buflen) !=
> +      ENCRYPTION_KEY_VERSION_INVALID)
> +  {
> +    return 0;
> +  }
> +  std::string response_str;
> +  const char *response;
> +  const char *js, *key;
> +  int js_len, key_len;
> +  size_t response_len;
> +/*
> +  Checking the maximum allowable length to protect
> +  against allocating too much memory on the stack:
> +*/
> +  if (vault_url_len > max_url_size)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Maximum allowed vault URL length exceeded",
> +                    0);
> +    return true;
> +  }
> +/*
> +  Maximum buffer length = url length plus 40 characters of the
> +  two 64-bit unsigned integers, plus a slash character, plus a
> +  question mark, plus length of the "data/" and the "version="
> +  strings and plus a zero byte:
> +*/
> +  size_t buf_len = vault_url_len + (40 + 5 + 8 + 3);
> +  char *url = (char *) alloca(buf_len);
> +  if (vault_url_len && vault_url[vault_url_len - 1] != '/')
> +  {
> +    if (key_version != ENCRYPTION_KEY_VERSION_INVALID)
> +      snprintf(url, buf_len, "%s/data/%u?version=%u", vault_url, key_id,
> +               key_version);
> +    else
> +      snprintf(url, buf_len, "%s/data/%u", vault_url, key_id);
> +  }
> +  else
> +  {
> +    if (key_version != ENCRYPTION_KEY_VERSION_INVALID)
> +      snprintf(url, buf_len, "%sdata/%u?version=%u", vault_url, key_id,
> +               key_version);
> +    else
> +      snprintf(url, buf_len, "%sdata/%u", vault_url, key_id);
> +  }
> +  if (curl_run(url, &response_str, false) != OPERATION_OK)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR, "Unable to get key data", 0);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  response = response_str.c_str();
> +  response_len = response_str.size();
> +  /*
> +    If the key is not found, this is not considered a fatal error,
> +    but we need to add an informational message to the log:
> +  */
> +  if (response_len == 0)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Key not found",
> +                    ME_ERROR_LOG_ONLY | ME_NOTE);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (json_get_object_key(response, response + response_len, "data",
> +                          &js, &js_len) != JSV_OBJECT)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get data object (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +#ifndef NDEBUG
> +  unsigned long version;
> +#else
> +  unsigned long version= key_version;
> +  if (caching_enabled &&
> +      key_version == ENCRYPTION_KEY_VERSION_INVALID)
> +#endif
> +  {
> +    const char *ver;
> +    int ver_len;
> +    if (json_get_object_key(js, js + js_len, "metadata",
> +                            &ver, &ver_len) != JSV_OBJECT)
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Unable to get metadata object (http response is: %s)",
> +                      0, response);
> +      return ENCRYPTION_KEY_VERSION_INVALID;
> +    }
> +    if (json_get_object_key(ver, ver + ver_len, "version",
> +                            &ver, &ver_len) != JSV_NUMBER)
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Unable to get version number (http response is: %s)",
> +                      0, response);
> +      return ENCRYPTION_KEY_VERSION_INVALID;
> +    }
> +    /*
> +      Here we may use atoi, but it has undefined overflow behavior
> +      and does not work with unsigned integers, so use strtoul:
> +    */
> +    errno = 0;
> +    version = strtoul(ver, NULL, 10);
> +    /*
> +      If the result of the conversion does not fit into an
> +      unsigned integer or if an error (for example, overflow)
> +      occurred during the conversion, then we need to return
> +      an error:
> +    */
> +    if (version > UINT_MAX || (version == ULONG_MAX && errno))
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Integer conversion error (for version number)",
> +                      0);
> +      return ENCRYPTION_KEY_VERSION_INVALID;
> +    }
> +  }
> +#ifndef NDEBUG
> +  /*
> +    An internal check that is needed only for debugging the plugin
> +    operation - in order to ensure that we get from the Hashicorp Vault
> +    server exactly the version of the key that is needed:
> +  */
> +  if (key_version != ENCRYPTION_KEY_VERSION_INVALID &&
> +      key_version != version)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR, "Key version mismatch", 0);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +#endif
> +  if (json_get_object_key(js, js + js_len, "data",
> +                          &js, &js_len) != JSV_OBJECT)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get second-level data object "
> +                    "(http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (json_get_object_key(js, js + js_len, "data",
> +                          &key, &key_len) != JSV_STRING)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Unable to get data string (http response is: %s)",
> +                    0, response);
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  unsigned int max_length = *buflen;
> +  if (((unsigned int) key_len >> 1) > max_length)
> +  {
> +    my_printf_error(ER_UNKNOWN_ERROR,
> +                    "Encryption key buffer too small",
> +                    ME_ERROR_LOG_ONLY | ME_NOTE);
> +    *buflen = key_len;
> +    return ENCRYPTION_KEY_BUFFER_TOO_SMALL;
> +  }
> +  unsigned int length = 0;
> +  while (key_len >= 2)
> +  {
> +    int c1 = key[0];
> +    int c2 = key[1];
> +    if (! isxdigit(c1) || ! isxdigit(c2))
> +    {
> +      break;
> +    }
> +    c1 -= '0';
> +    c2 -= '0';
> +    if (c1 > 9)
> +    {
> +      c1 -= 'A' - '0';
> +      if (c1 > 15)
> +      {
> +        c1 -= 'a' - 'A';
> +      }
> +    }
> +    if (c2 > 9)
> +    {
> +      c2 -= 'A' - '0';
> +      if (c1 > 15)
> +      {
> +        c2 -= 'a' - 'A';
> +      }
> +    }
> +    dstbuf[length++] = (c1 << 4) + c2;
> +    key += 2;
> +    key_len -= 2;
> +  }
> +  *buflen = length;
> +  if (key_len)
> +  {
> +    if (key_len != 1)
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Syntax error - the key data should contain only "
> +                      "hexadecimal digits",
> +                      0);
> +    }
> +    else
> +    {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "Syntax error - extra character in the key data",
> +                      0);
> +    }
> +    return ENCRYPTION_KEY_VERSION_INVALID;
> +  }
> +  if (caching_enabled)
> +  {
> +    unsigned int int_ver= (unsigned int) version;
> +    KEY_INFO info;
> +    info.key_id = key_id;
> +    info.key_version = int_ver;
> +    info.length = length;
> +    memcpy(info.data, dstbuf, length);
> +    cache_add(key_id, int_ver, &info);
> +  }
> +  return 0;

1. you don't update the latest_version_cache here.
2. this function has lots of code essentially the same as in the
   get_latest_version. Please reuse, don't copy-paste.

> +}
> +
> +struct st_mariadb_encryption hashicorp_key_management_plugin= {
> +  MariaDB_ENCRYPTION_INTERFACE_VERSION,
> +  get_latest_version,
> +  get_key_from_vault,
> +  0, 0, 0, 0, 0
> +};
> +
> +#ifdef _MSC_VER
> +
> +static int setenv(const char *name, const char *value, int overwrite)
> +{
> +  if (!overwrite)
> +  {
> +    size_t len= 0;
> +    int rc= getenv_s(&len, NULL, 0, name);
> +    if (rc)
> +    {
> +      return rc;
> +    }
> +    if (len)
> +    {
> +      errno = EINVAL;
> +      return EINVAL;
> +    }
> +  }
> +  return _putenv_s(name, value);
> +}
> +
> +#endif
> +
> +static char *local_token= NULL;
> +
> +static int hashicorp_key_management_plugin_init(void *p)
> +{
> +  char *token_env= getenv("VAULT_TOKEN");
> +  token_len = strlen(token);
> +  if (token_len == 0) {
> +    if (token_env)
> +    {
> +      token_len = strlen(token_env);
> +      if (token_len != 0)
> +      {
> +        token = (char *) malloc(token_len + 1);
> +        memcpy(token, token_env, token_len + 1);
> +        local_token = token;
> +      }
> +    }
> +    if (token_len == 0) {
> +      my_printf_error(ER_UNKNOWN_ERROR,
> +                      "The --hashicorp-key-management-token option value "
> +                      "or the value of the corresponding parameter in the "
> +                      "configuration file must be specified, otherwise the "
> +                      "VAULT_TOKEN environment variable must be set",
> +                      0);
> +      return 1;
> +    }
> +  }
> +  else
> +  {
> +    /*
> +      If the VAULT_TOKEN environment variable is not set or
> +      is not equal to the value of the token parameter, then
> +      we must set (overwrite) it for correct operation of
> +      the mariabackup:
> +    */
> +    bool not_equal= token_env != NULL && strcmp(token_env, token) != 0;
> +    if (token_env == NULL || not_equal)
> +    {
> +      setenv("VAULT_TOKEN", token, 1);
> +      if (not_equal)
> +      {
> +        my_printf_error(ER_UNKNOWN_ERROR,
> +                        "The --hashicorp-key-management-token option value "
> +                        "or the value of the corresponding parameter is not "
> +                        "equal to the value of the VAULT_TOKEN environment "
> +                        "variable",
> +                        ME_ERROR_LOG_ONLY | ME_WARNING);
> +      }

what mariabackup child processes need it?

> +    }
> +  }
> +  curl_global_init(CURL_GLOBAL_ALL);
> +  vault_url_len = strlen(vault_url);
> +  return 0;
> +}
> +
> +static int hashicorp_key_management_plugin_deinit(void *p)
> +{
> +  latest_version_cache.clear();
> +  key_info_cache.clear();
> +  curl_global_cleanup();
> +  if (local_token)
> +  {
> +    free(local_token);
> +  }
> +  return 0;
> +}
> +
> +/*
> +  Plugin library descriptor
> +*/
> +maria_declare_plugin(hashicorp_key_management)
> +{
> +  MariaDB_ENCRYPTION_PLUGIN,
> +  &hashicorp_key_management_plugin,
> +  "hashicorp_key_management",
> +  "MariaDB Corporation",
> +  "HashiCorp Vault key management plugin",
> +  PLUGIN_LICENSE_GPL,
> +  hashicorp_key_management_plugin_init,
> +  hashicorp_key_management_plugin_deinit,
> +  0x0102 /* 1.02 */,
> +  NULL, /* status variables */
> +  settings,
> +  "1.02",
> +  MariaDB_PLUGIN_MATURITY_ALPHA
> +}
> +maria_declare_plugin_end;
> 
Regards,
Sergei
VP of MariaDB Server Engineering
and secur...@mariadb.org

_______________________________________________
Mailing list: https://launchpad.net/~maria-developers
Post to     : maria-developers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~maria-developers
More help   : https://help.launchpad.net/ListHelp

Reply via email to