On 2015/02/11 21:13, Qiu, Michael wrote:
> On 2/11/2015 4:14 PM, Tetsuya Mukawa wrote:
>> On 2015/02/11 15:29, Qiu, Michael wrote:
>>> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>>>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Qiu, Michael
>>>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>>>> To: Tetsuya Mukawa; dev at dpdk.org
>>>>>>>> Cc: Iremonger, Bernard
>>>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address 
>>>>>>>> comparison APIs
>>>>>>>>
>>>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>>>
>>>>>>>>> v5:
>>>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>>>> v4:
>>>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>>>> - Add parameter checking.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
>>>>>>>>> ---
>>>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 
>>>>>>>>> ++++++++---------------
>>>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 
>>>>>>>>> +++++++++++++++++++++++++++++++
>>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 
>>>>>>>>> ++++++++---------------
>>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> index 74ecce7..c844d58 100644
>>>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>>>       return (0);
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> *addr2) -{
>>>>>>>>> -     uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>>>>>> (addr->devid << 8) + addr-
>>>>>>>>> function;
>>>>>>>>> -     uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) 
>>>>>>>>> + (addr2->devid << 8) +
>>>>>>>> addr2->function;
>>>>>>>>> -
>>>>>>>>> -     if (dev_addr > dev_addr2)
>>>>>>>>> -             return 1;
>>>>>>>>> -     else
>>>>>>>>> -             return 0;
>>>>>>>>> -}
>>>>>>>>> -
>>>>>>>>> -
>>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>>>       }
>>>>>>>>>       else {
>>>>>>>>>               struct rte_pci_device *dev2 = NULL;
>>>>>>>>> +             int ret;
>>>>>>>>>
>>>>>>>>>               TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>>> -                     if (pci_addr_comparison(&dev->addr, 
>>>>>>>>> &dev2->addr))
>>>>>>>>> +                     ret = eal_compare_pci_addr(&dev->addr, 
>>>>>>>>> &dev2->addr);
>>>>>>>>> +                     if (ret > 0)
>>>>>>>>>                               continue;
>>>>>>>>> -                     else {
>>>>>>>>> +                     else if (ret < 0) {
>>>>>>>>>                               TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>>                               return 0;
>>>>>>>>> +                     } else { /* already registered */
>>>>>>>>> +                             /* update pt_driver */
>>>>>>>>> +                             dev2->pt_driver = dev->pt_driver;
>>>>>>>>> +                             free(dev);
>>>>>>>>> +                             return 0;
>>>>>>>>>                       }
>>>>>>>>>               }
>>>>>>>>>               TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff 
>>>>>>>>> --git
>>>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs 
>>>>>>>>> *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>>>>               if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>>>                       devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>>>                       continue;
>>>>>>>>> -             if (!memcmp(&dev->addr, &devargs->pci.addr, 
>>>>>>>>> sizeof(dev->addr)))
>>>>>>>>> +             if (!eal_compare_pci_addr(&dev->addr, 
>>>>>>>>> &devargs->pci.addr))
>>>>>>>>>                       return devargs;
>>>>>>>>>       }
>>>>>>>>>       return NULL;
>>>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>>>
>>>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>>>> +/**
>>>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>>>> + *
>>>>>>>>> + * @param addr
>>>>>>>>> + *   The PCI Bus-Device-Function address to compare
>>>>>>>>> + * @param addr2
>>>>>>>>> + *   The PCI Bus-Device-Function address to compare
>>>>>>>>> + * @return
>>>>>>>>> + *   0 on equal PCI address.
>>>>>>>>> + *   Positive on addr is greater than addr2.
>>>>>>>>> + *   Negative on addr is less than addr2, or error.
>>>>>>>>> + */
>>>>>>>>> +static inline int
>>>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> +*addr2) {
>>>>>>>>> +     uint64_t dev_addr, dev_addr2;
>>>>>>>>> +
>>>>>>>>> +     if ((addr == NULL) || (addr2 == NULL))
>>>>>>>>> +             return -1;
>>>>>>>>> +
>>>>>>>>> +     dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>>>> +                             (addr->devid << 8) | addr->function;
>>>>>>>>> +     dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>>>> +                             (addr2->devid << 8) | addr2->function;
>>>>>>>>> +
>>>>>>>>> +     if (dev_addr > dev_addr2)
>>>>>>>>> +             return 1;
>>>>>>>>> +     else if (dev_addr < dev_addr2)
>>>>>>>>> +             return -1;
>>>>>>>>> +     else
>>>>>>>>> +             return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  /**
>>>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>>>   *
>>>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> index c0ca5a5..d847102 100644
>>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>>>       return -1;
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> *addr2) -{
>>>>>>>>> -     uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + 
>>>>>>>>> (addr->devid << 8) + addr-
>>>>>>>>> function;
>>>>>>>>> -     uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) 
>>>>>>>>> + (addr2->devid << 8) +
>>>>>>>> addr2->function;
>>>>>>>>> -
>>>>>>>>> -     if (dev_addr > dev_addr2)
>>>>>>>>> -             return 1;
>>>>>>>>> -     else
>>>>>>>>> -             return 0;
>>>>>>>>> -}
>>>>>>>>> -
>>>>>>>>> -
>>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>>>> domain, uint8_t bus,
>>>>>>>>>       }
>>>>>>>>>       else {
>>>>>>>>>               struct rte_pci_device *dev2 = NULL;
>>>>>>>>> +             int ret;
>>>>>>>>>
>>>>>>>>>               TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>>> -                     if (pci_addr_comparison(&dev->addr, 
>>>>>>>>> &dev2->addr))
>>>>>>>>> +                     ret = eal_compare_pci_addr(&dev->addr, 
>>>>>>>>> &dev2->addr);
>>>>>>>>> +                     if (ret > 0)
>>>>>>>>>                               continue;
>>>>>>>>> -                     else {
>>>>>>>>> +                     else if (ret < 0) {
>>>>>>>>>                               TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>>                               return 0;
>>>>>>>>> +                     } else { /* already registered */
>>>>>>>>> +                             /* update pt_driver */
>>>>>>>>> +                             dev2->pt_driver = dev->pt_driver;
>>>>>>> Hi Tetsuya,
>>>>>>>
>>>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs 
>>>>>>> is being lost in some scenarios.
>>>>>>> The following line should be added here:
>>>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>>>
>>>>>>> numa_mode should probably be updated too (although it is not causing a 
>>>>>>> problem at present).
>>>>>>>       dev2->numa_mode = dev->numa_mode;
>>>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>>>> this field.
>>>>>>
>>>>>> What is the root cause?
>>>>> Hi Michael,
>>>>>
>>>>> Here is my guess.
>>>>> The above function creates pci device list.
>>>> I am sorry. I forgot to add below information.
>>>>
>>>> "max_vfs" or "numa_node" value is came from sysfs when the above
>>>> function is processed.
>>> Yes, but it has already been registered, why it missed?
>> Yes, it has been registered already, but probably should be updated.
>> I guess sysfs value will be changed when igb_uio starts managing the device.
>>
>> ex)
>> 1. Boot linux
>> 2. start a dpdk application with no port.
>> 3. pci device list is registered.
>>  - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
>> 4. igb_uio binds the device.
>> 5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
>> 6. The dpdk application calls hotplug function.
> Yes, agree.
>
> But numa node can be changed?

Hi Michael,

I may misunderstand meaning of numa_node.
I thought it indicated which numa node was nearest from the pci device,
so it could not be configurable.
BTW, I will be out of office tomorrow. So, I will submit v8 patches next
Monday.

Thanks,
Tetsuya

>
> Bernard, does your issue occur after max_vfs changed in igb_uio?
>
> If not, I think must be figure out the reason.
>
> Thanks,
> Michael
>>  - Here, I guess we need to update "max_vfs" value.
>>
>> Above is a just my assumption.
>> It may be good to wait for Bernard's reply.
>>
>> Thanks,
>> Tetsuya
>>
>>> Thanks,
>>> Michael
>>>>> And current DPDK implementation assumes all devices needed to be managed
>>>>> are under igb_uio or vfio when above code is processed.
>>>>> To add hotplug function, we also need to think some devices will start
>>>>> to be managed under igb_uio or vfio after initializing pci device list.
>>>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>>>> manages the device.
>>>>>
>>>>> Hi Bernard,
>>>>>
>>>>> Could you please check "max_vfs" and "num_node" values, then check the
>>>>> values again after the device is managed by igb_uio or vfio?
>>>>> In my environment, it seems max_vfs is created by igb_uio.
>>>>> But my NIC doesn't have VF, so behavior might be different in your
>>>>> environment.
>>>>> I guess "numa_node" should not be changed theoretically.
>>>>>
>>>>> If my guess is correct, how about replacing following values?
>>>>> - driver
>>>>> - max_vfs
>>>>> - resource
>>>>> - (numa_node)
>>>>> Except for above value, I guess other value shouldn't be changed even
>>>>> after the device is managed by igb_uio or vfio.
>>>>>
>>>>> Thanks,
>>>>> Tetsuya
>>>>>
>>>>>> Thanks,
>>>>>> Michael
>>>>>>
>>>>>>> Regards,
>>>>>>>
>>>>>>> Bernard.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>> +                             free(dev);
>>>>>>>>> +                             return 0;
>>>>>>>>>                       }
>>>>>>>>>               }
>>>>>>>>>               TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff 
>>>>>>>>> --git
>>>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> index e53f06b..1da3507 100644
>>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>>>       TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>>>
>>>>>>>>>               /* skip this element if it doesn't match our PCI 
>>>>>>>>> address */
>>>>>>>>> -             if (memcmp(&uio_res->pci_addr, &dev->addr, 
>>>>>>>>> sizeof(dev->addr)))
>>>>>>>>> +             if (eal_compare_pci_addr(&uio_res->pci_addr, 
>>>>>>>>> &dev->addr))
>>>>>>>>>                       continue;
>>>>>>>>>
>>>>>>>>>               for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>>>> Acked-by: Michael Qiu <michael.qiu at intel.com>
>>


Reply via email to