Hi Guy,

I haven't used FMRs before, so if I make some incorrect assumptions 
about how they work please point them out.

Some high level questions:

Why did you use the FMR pool API (ib_fmr_pool.h) instead of the verbs 
(ib_alloc_fmr, ib_map_phys_fmr, ...)?

Why did you make FMR support configurable via a module parameter? My 
concern is that this forces users to know if their kdapl software 
needs FMR support.

More questions/comments:

On Thu, 11 Aug 2005, Guy German wrote:

> James,
> 
> This is a Tavor FMR implementation for kDPAL, that works over 
> gen2's fmr_pool.c implementation.
> 
> A few notes:
> 1. I've added mod params to kdapl_ib to control the fmr size and pool 
> length. There are still reasonable defaults, but I think we should allow
> consumers to control this.
> 2. the fmr pool allocation is done in the pz_create (if active_fmr =1). 
> The problem is that at the time of creating the pool we don't 
> know what the consumer's privileges request is going to be (passed 
> afterwords in dapl_lmr_kcreate). I think this can be solved by creating 
> 3 pools and taking from the right pool at lmr_kcreate time. 
> Do you have any "cheaper" solution to this ?

My naive suggestion is to use ib_alloc_fmr(), etc.

> 3. Can we get rid of DAT_MEM_TYPE_LMR code ? I don't understand whats it 
> for.

It is supposed to implement that memory type. If it is broken, we 
should fix it.

> Signed-off-by: Guy German <[EMAIL PROTECTED]>
> 
> Index: dapl_openib_util.c
> ===================================================================
> --- dapl_openib_util.c        (revision 3056)
> +++ dapl_openib_util.c        (working copy)
> @@ -196,6 +196,53 @@ int dapl_ib_mr_register_physical(struct 
>       return 0;
>  }
>  
> +int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
> +                         void *phys_addr, u64 length,
> +                         enum dat_mem_priv_flags privileges)

Why not change this function signature to match what you want to 
receive (ie. change length to page_count)?

> +{
> +     /* FIXME: this phase-1 implementation of fmr doesn't take "privileges"
> +        into account. This is a security breech. */
> +     u64 io_addr;
> +     u64 *page_list;
> +     int page_count;
> +     struct ib_pool_fmr *mem;
> +        int status;
> +
> +     page_list = (u64 *)phys_addr;
> +     page_count = (int)length;
> +     io_addr = page_list[0];
> +
> +     mem = ib_fmr_pool_map_phys (((struct dapl_pz *)lmr->param.pz)->fmr_pool,
> +                                     page_list,
> +                                     page_count,
> +                                     &io_addr);
> +     if (IS_ERR(mem)) {
> +             status = (int)PTR_ERR(mem);
> +             if (status != -EAGAIN)
> +                     dapl_dbg_log(DAPL_DBG_TYPE_ERR,
> +                                 "fmr_pool_map_phys ret=%d <%d pages>\n",
> +                                 status, page_count);
> +
> +             lmr->param.registered_address = 0;
> +             lmr->fmr = 0;
> +             return status;
> +     }
> +
> +     lmr->param.lmr_context = mem->fmr->lkey;
> +     lmr->param.rmr_context = mem->fmr->rkey;
> +     lmr->param.registered_size = length * PAGE_SIZE;
> +     lmr->param.registered_address = io_addr;
> +     lmr->fmr = mem;
> +
> +     dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
> +             "%s: (addr=%p size=0x%x) lkey=0x%x rkey=0x%x\n", __func__,
> +             lmr->param.registered_address, 
> +             lmr->param.registered_size, 
> +             lmr->param.lmr_context,
> +             lmr->param.rmr_context);
> +     return 0;
> +}
> +
>  int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
>                              enum dat_mem_priv_flags privileges)
>  {
> @@ -222,7 +269,10 @@ int dapl_ib_mr_deregister(struct dapl_lm
>  {
>       int status;
>  
> -     status = ib_dereg_mr(lmr->mr);
> +     if (DAT_MEM_TYPE_PLATFORM == lmr->param.mem_type && lmr->fmr)
> +             status = ib_fmr_pool_unmap(lmr->fmr);
> +     else
> +             status = ib_dereg_mr(lmr->mr);
>       if (status < 0) {
>               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
>                            " ib_dereg_mr error code return = %d\n",
> Index: dapl_openib_util.h
> ===================================================================
> --- dapl_openib_util.h        (revision 3056)
> +++ dapl_openib_util.h        (working copy)
> @@ -87,6 +87,10 @@ int dapl_ib_mr_register_physical(struct 
>                                void *phys_addr, u64 length,
>                                enum dat_mem_priv_flags privileges);
>  
> +int dapl_ib_mr_register_fmr(struct dapl_ia *ia, struct dapl_lmr *lmr,
> +                         void *phys_addr, u64 length,
> +                         enum dat_mem_priv_flags privileges);
> +
>  int dapl_ib_mr_deregister(struct dapl_lmr *lmr);
>  
>  int dapl_ib_mr_register_shared(struct dapl_ia *ia, struct dapl_lmr *lmr,
> Index: dapl_lmr.c
> ===================================================================
> --- dapl_lmr.c        (revision 3056)
> +++ dapl_lmr.c        (working copy)
> @@ -137,7 +137,7 @@ static inline int dapl_lmr_create_physic
>                                          u64 *registered_address)
>  {
>       struct dapl_lmr *new_lmr;
> -     int status;
> +     int status = 0;
>  
>       new_lmr = dapl_lmr_alloc(ia, mem_type, phys_addr,
>                                page_count, (struct dat_pz *) pz, privileges);
> @@ -151,13 +151,22 @@ static inline int dapl_lmr_create_physic
>               status = dapl_ib_mr_register_ia(ia, new_lmr, phys_addr,
>                                               page_count, privileges);
>       }
> -     else {
> +     else if (DAT_MEM_TYPE_PHYSICAL == mem_type) {
>               status = dapl_ib_mr_register_physical(ia, new_lmr, 
>                                                     phys_addr.for_array,
>                                                     page_count, privileges);
>       }
> +     else if (DAT_MEM_TYPE_PLATFORM == mem_type) {
> +             status = dapl_ib_mr_register_fmr(ia, new_lmr,
> +                                              phys_addr.for_array,
> +                                              page_count, privileges);
> +     }
> +     else {
> +             status = -EINVAL;
> +             goto error1;
> +     }
>  
> -     if (0 != status)
> +     if (status)
>               goto error2;
>  
>       atomic_inc(&pz->pz_ref_count);
> @@ -243,7 +252,7 @@ int dapl_lmr_kcreate(struct dat_ia *ia, 
>       int status;
>  
>       dapl_dbg_log(DAPL_DBG_TYPE_API,
> -                  "dapl_lmr_kcreate(ia:%p, mem_type:%x, ...)\n",
> +                  "dapl_lmr_kcreate(ia:%p, mem_type:%x)\n",

I like the ... in the printout :)

>                    ia, mem_type);
>  
>       dapl_ia = (struct dapl_ia *)ia;
> @@ -258,6 +267,11 @@ int dapl_lmr_kcreate(struct dat_ia *ia, 
>                                                rmr_context, registered_length,
>                                                registered_address);
>               break;
> +     case DAT_MEM_TYPE_PLATFORM: /* used as a proprietary Tavor-FMR */

My understanding is that FMRs are not specific to Tavor. I thought 
Arbel and Sinai also had FMR support. Is that correct? If so, we 
should change this comment.

> +             if (!active_fmr) {
> +                     status = -EINVAL;
> +                     break;
> +             }
>       case DAT_MEM_TYPE_PHYSICAL:
>       case DAT_MEM_TYPE_IA:
>               status = dapl_lmr_create_physical(dapl_ia, region_description,
> @@ -307,6 +321,7 @@ int dapl_lmr_free(struct dat_lmr *lmr)
>  
>       switch (dapl_lmr->param.mem_type) {
>       case DAT_MEM_TYPE_PHYSICAL:
> +     case DAT_MEM_TYPE_PLATFORM:
>       case DAT_MEM_TYPE_IA:
>       case DAT_MEM_TYPE_LMR:
>       {
> Index: dapl_pz.c
> ===================================================================
> --- dapl_pz.c (revision 3056)
> +++ dapl_pz.c (working copy)
> @@ -89,7 +89,17 @@ int dapl_pz_create(struct dat_ia *ia, st
>                            status);
>               goto error2;
>       }
> -     
> +
> +        if (active_fmr) {
> +             struct ib_fmr_pool_param params;
> +             set_fmr_params (&params);
> +             dapl_pz->fmr_pool = ib_create_fmr_pool(dapl_pz->pd, &params);
> +             if (IS_ERR(dapl_pz->fmr_pool))
> +                     dapl_dbg_log(DAPL_DBG_TYPE_WARN, 
> +                                  "could not create FMR pool <%ld>",
> +                                  PTR_ERR(dapl_pz->fmr_pool));
> +     }
> +
>       *pz = (struct dat_pz *)dapl_pz;
>       return 0;
>  
> @@ -104,7 +114,7 @@ error1:
>  int dapl_pz_free(struct dat_pz *pz)
>  {
>       struct dapl_pz *dapl_pz;
> -     int status;
> +     int status=0;
>  
>       dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_pz_free(%p)\n", pz);
>  
> @@ -114,8 +124,10 @@ int dapl_pz_free(struct dat_pz *pz)
>               status = -EINVAL;
>               goto error;
>       }
> -     
> -     status = ib_dealloc_pd(dapl_pz->pd);
> +     if (active_fmr)
> +             (void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
> +     else
> +             status = ib_dealloc_pd(dapl_pz->pd);


If active_fmr is true, we still allocate a PD in dapl_pz_create. 
Should the above be 

+       if (active_fmr)
+               (void)ib_destroy_fmr_pool(dapl_pz->fmr_pool);
+       status = ib_dealloc_pd(dapl_pz->pd);


>       if (status) {
>               dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_dealloc_pd failed: %X\n",
>                            status);
> Index: dapl_ia.c
> ===================================================================
> --- dapl_ia.c (revision 3056)
> +++ dapl_ia.c (working copy)
> @@ -745,7 +745,8 @@ int dapl_ia_query(struct dat_ia *ia_ptr,
>               provider_attr->provider_version_major = DAPL_PROVIDER_MAJOR;
>               provider_attr->provider_version_minor = DAPL_PROVIDER_MINOR;
>               provider_attr->lmr_mem_types_supported =
> -                 DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA;
> +                 DAT_MEM_TYPE_PHYSICAL | DAT_MEM_TYPE_IA | 
> +                         DAT_MEM_TYPE_PLATFORM;

Please align the DAT_MEM_TYPE_PLATFORM with the above line.

>               provider_attr->iov_ownership_on_return = DAT_IOV_CONSUMER;
>               provider_attr->dat_qos_supported = DAT_QOS_BEST_EFFORT;
>               provider_attr->completion_flags_supported =
> Index: dapl_provider.c
> ===================================================================
> --- dapl_provider.c   (revision 3056)
> +++ dapl_provider.c   (working copy)
> @@ -48,8 +48,19 @@ MODULE_AUTHOR("James Lentini");
>  
>  #ifdef CONFIG_KDAPL_INFINIBAND_DEBUG
>  static DAPL_DBG_MASK g_dapl_dbg_mask = 0;
> +unsigned int active_fmr = 1;
> +static unsigned int pool_size = 2048;
> +static unsigned int max_pages_per_fmr = 64;

These FMR module parameters should not be inside the 
CONFIG_KDAPL_INFINIBAND_DEBUG guard. They should be enabled regardless 
of the debug configuration.

> +
>  module_param_named(dbg_mask, g_dapl_dbg_mask, int, 0644);
> -MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types.");
> +module_param_named(active_fmr, active_fmr, int, 0644);
> +module_param_named(pool_size, pool_size, int, 0644);
> +module_param_named(max_pages_per_fmr, max_pages_per_fmr, int, 0644);

Can you use the g_dapl_ prefix for active_fmr, pool_size, and 
max_pages_per_fmr?

> +MODULE_PARM_DESC(dbg_mask, "Bitmask to enable debug message types. 
> <default=0>");
> +MODULE_PARM_DESC(active_fmr, "if active_fmr==1, creates fmr pool in 
> pz_create <default=0>");
> +MODULE_PARM_DESC(pool_size, "num of fmr handles in pool <default=2048>");
> +MODULE_PARM_DESC(max_pages_per_fmr, "max size (in pages) of an fmr handle 
> <default=64>");
> +
>  #endif /* CONFIG_KDAPL_INFINIBAND_DEBUG */
>  
>  static LIST_HEAD(g_dapl_provider_list);
> @@ -152,6 +163,18 @@ void dapl_dbg_log(enum dapl_dbg_type typ
>  
>  #endif /* KDAPL_INFINIBAND_DEBUG */
>  
> +void set_fmr_params (struct ib_fmr_pool_param *fmr_param_s)
> +{
> +     fmr_param_s->max_pages_per_fmr = max_pages_per_fmr;
> +     fmr_param_s->pool_size = pool_size;
> +     fmr_param_s->dirty_watermark = 32;
> +     fmr_param_s->cache = 1;
> +     fmr_param_s->flush_function = NULL;
> +     fmr_param_s->access = (IB_ACCESS_LOCAL_WRITE  |
> +                           IB_ACCESS_REMOTE_WRITE |
> +                           IB_ACCESS_REMOTE_READ);
> +}
> +             

Lets find a better name for this function and possibly a different 
location. How about dapl_fmr_pool_param_init? 

How about shortening the parameter name from fmr_param_s to either 
fmr_params or fmr_param. Either of those would be more standard.

>  static struct dapl_provider *dapl_provider_alloc(const char *name, 
>                                                struct ib_device *device, 
>                                                u8 port)
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to