On 07/20/2012 07:41 PM, Sage Weil wrote:
> From: caleb miles <[email protected]>
>
> The server side recently added support for tuning some magic
> crush variables. Decode these variables if they are present, or use the
> default values if they are not present.
>
> Corresponds to ceph.git commit 89af369c25f274fe62ef730e5e8aad0c54f1e5a5.
>
> Signed-off-by: caleb miles <[email protected]>
> Reviewed-by: Sage Weil <[email protected]>
You know I prefer symbolic constant definitions instead of
bald numeric constants in the middle of code. I.e.:
#define CRUSH_LOCAL_TRIES_DEFAULT 2
#define CRUSH_LOCAL_FALLBACK_TRIES_DEFAULT 5
#define CRUSH_TOTAL_TRIES_DEFAULT 19
But it's OK with me if this gets committed as-is.
Reviewed-by: Alex Elder <[email protected]>
> ---
> include/linux/ceph/ceph_features.h | 4 ++-
> include/linux/crush/crush.h | 8 +++++++
> net/ceph/crush/mapper.c | 13 ++++++-----
> net/ceph/osdmap.c | 39
> ++++++++++++++++++++++++++++++++++++
> 4 files changed, 57 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/ceph/ceph_features.h
> b/include/linux/ceph/ceph_features.h
> index 342f93d..df25dcf 100644
> --- a/include/linux/ceph/ceph_features.h
> +++ b/include/linux/ceph/ceph_features.h
> @@ -12,12 +12,14 @@
> #define CEPH_FEATURE_MONNAMES (1<<5)
> #define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
> #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
> +#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
>
> /*
> * Features supported.
> */
> #define CEPH_FEATURES_SUPPORTED_DEFAULT \
> - (CEPH_FEATURE_NOSRCADDR)
> + (CEPH_FEATURE_NOSRCADDR | \
> + CEPH_FEATURE_CRUSH_TUNABLES)
>
> #define CEPH_FEATURES_REQUIRED_DEFAULT \
> (CEPH_FEATURE_NOSRCADDR)
> diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
> index 7c47508..25baa28 100644
> --- a/include/linux/crush/crush.h
> +++ b/include/linux/crush/crush.h
> @@ -154,6 +154,14 @@ struct crush_map {
> __s32 max_buckets;
> __u32 max_rules;
> __s32 max_devices;
> +
> + /* choose local retries before re-descent */
> + __u32 choose_local_tries;
> + /* choose local attempts using a fallback permutation before
> + * re-descent */
> + __u32 choose_local_fallback_tries;
> + /* choose attempts before giving up */
> + __u32 choose_total_tries;
> };
>
>
> diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
> index d7edc24..35fce75 100644
> --- a/net/ceph/crush/mapper.c
> +++ b/net/ceph/crush/mapper.c
> @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
> int item = 0;
> int itemtype;
> int collide, reject;
> - const unsigned int orig_tries = 5; /* attempts before we fall back to
> search */
>
> dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n",
> recurse_to_leaf ? "_LEAF" : "",
> bucket->id, x, outpos, numrep);
> @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
> reject = 1;
> goto reject;
> }
> - if (flocal >= (in->size>>1) &&
> - flocal > orig_tries)
> + if (map->choose_local_fallback_tries > 0 &&
> + flocal >= (in->size>>1) &&
> + flocal > map->choose_local_fallback_tries)
> item = bucket_perm_choose(in, x, r);
> else
> item = crush_bucket_choose(in, x, r);
> @@ -422,13 +422,14 @@ reject:
> ftotal++;
> flocal++;
>
> - if (collide && flocal < 3)
> + if (collide && flocal <=
> map->choose_local_tries)
> /* retry locally a few times */
> retry_bucket = 1;
> - else if (flocal <= in->size +
> orig_tries)
> + else if
> (map->choose_local_fallback_tries > 0 &&
> + flocal <= in->size +
> map->choose_local_fallback_tries)
> /* exhaustive bucket search */
> retry_bucket = 1;
> - else if (ftotal < 20)
> + else if (ftotal <=
> map->choose_total_tries)
> /* then retry descent */
> retry_descent = 1;
> else
> diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
> index 9600674..3124b71 100644
> --- a/net/ceph/osdmap.c
> +++ b/net/ceph/osdmap.c
> @@ -135,6 +135,21 @@ bad:
> return -EINVAL;
> }
>
> +static int skip_name_map(void **p, void *end)
> +{
> + int len;
> + ceph_decode_32_safe(p, end, len ,bad);
> + while (len--) {
> + int strlen;
> + *p += sizeof(u32);
> + ceph_decode_32_safe(p, end, strlen, bad);
> + *p += strlen;
> +}
> + return 0;
> +bad:
> + return -EINVAL;
> +}
> +
> static struct crush_map *crush_decode(void *pbyval, void *end)
> {
> struct crush_map *c;
> @@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void
> *end)
> void **p = &pbyval;
> void *start = pbyval;
> u32 magic;
> + u32 num_name_maps;
>
> dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
>
> @@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void
> *end)
> if (c == NULL)
> return ERR_PTR(-ENOMEM);
>
> + /* set tunables to default values */
> + c->choose_local_tries = 2;
> + c->choose_local_fallback_tries = 5;
> + c->choose_total_tries = 19;
> +
> ceph_decode_need(p, end, 4*sizeof(u32), bad);
> magic = ceph_decode_32(p);
> if (magic != CRUSH_MAGIC) {
> @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void
> *end)
> }
>
> /* ignore trailing name maps. */
> + for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
> + err = skip_name_map(p, end);
> + if (err < 0)
> + goto done;
> + }
> +
> + /* tunables */
> + ceph_decode_need(p, end, 3*sizeof(u32), done);
> + c->choose_local_tries = ceph_decode_32(p);
> + c->choose_local_fallback_tries = ceph_decode_32(p);
> + c->choose_total_tries = ceph_decode_32(p);
> + dout("crush decode tunable choose_local_tries = %d",
> + c->choose_local_tries);
> + dout("crush decode tunable choose_local_fallback_tries = %d",
> + c->choose_local_fallback_tries);
> + dout("crush decode tunable choose_total_tries = %d",
> + c->choose_total_tries);
>
> +done:
> dout("crush_decode success\n");
> return c;
>
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html