On Thu, Nov 10, 2016 at 4:13 PM, Kees Cook <[email protected]> wrote:
> On Thu, Oct 20, 2016 at 12:34 AM, Joel Fernandes <[email protected]> wrote:
>> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
>> multiple zones depending on the number of CPUs.
>>
>> This speeds up the performance of function tracing by about 280% in my tests 
>> as
>> we avoid the locking. The trade off being lesser space available per CPU.  
>> Let
>> the ramoops user decide which option they want based on pdata flag.
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>> ---
>>  fs/pstore/ram.c            | 72 
>> +++++++++++++++++++++++++++++++++++-----------
>>  include/linux/pstore_ram.h |  3 ++
>>  2 files changed, 58 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
>> index 5caa512..5e96f78 100644
>> --- a/fs/pstore/ram.c
>> +++ b/fs/pstore/ram.c
>> @@ -87,7 +87,7 @@ MODULE_PARM_DESC(ramoops_ecc,
>>  struct ramoops_context {
>>         struct persistent_ram_zone **przs;
>>         struct persistent_ram_zone *cprz;
>> -       struct persistent_ram_zone *fprz;
>> +       struct persistent_ram_zone **fprzs;
>>         struct persistent_ram_zone *mprz;
>>         phys_addr_t phys_addr;
>>         unsigned long size;
>> @@ -97,6 +97,7 @@ struct ramoops_context {
>>         size_t ftrace_size;
>>         size_t pmsg_size;
>>         int dump_oops;
>> +       int flags;
>>         struct persistent_ram_ecc_info ecc_info;
>>         unsigned int max_dump_cnt;
>>         unsigned int dump_write_cnt;
>> @@ -219,9 +220,17 @@ static ssize_t ramoops_pstore_read(u64 *id, enum 
>> pstore_type_id *type,
>>         if (!prz_ok(prz))
>>                 prz = ramoops_get_next_prz(&cxt->cprz, 
>> &cxt->console_read_cnt,
>>                                            1, id, type, PSTORE_TYPE_CONSOLE, 
>> 0);
>> -       if (!prz_ok(prz))
>> -               prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
>> -                                          1, id, type, PSTORE_TYPE_FTRACE, 
>> 0);
>> +       if (!prz_ok(prz)) {
>> +               int max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
>> +               while (cxt->ftrace_read_cnt < max && !prz) {
>> +                       prz = ramoops_get_next_prz(cxt->fprzs,
>> +                                       &cxt->ftrace_read_cnt, max, id, type,
>> +                                       PSTORE_TYPE_FTRACE, 0);
>> +                       if (!prz_ok(prz))
>> +                               continue;
>> +               }
>> +       }
>> +
>>         if (!prz_ok(prz))
>>                 prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
>>                                            1, id, type, PSTORE_TYPE_PMSG, 0);
>> @@ -283,9 +292,23 @@ static int notrace ramoops_pstore_write_buf(enum 
>> pstore_type_id type,
>>                 persistent_ram_write(cxt->cprz, buf, size, PSTORE_RAM_LOCK);
>>                 return 0;
>>         } else if (type == PSTORE_TYPE_FTRACE) {
>> -               if (!cxt->fprz)
>> +               int zonenum, lock;
>> +
>> +               if (!cxt->fprzs)
>>                         return -ENOMEM;
>> -               persistent_ram_write(cxt->fprz, buf, size, PSTORE_RAM_LOCK);
>> +               /*
>> +                * If per-cpu buffers, don't lock. Otherwise there's only
>> +                * 1 zone for ftrace (zone 0) and all CPUs share it, so lock.
>> +                */
>> +               if (cxt->flags & FTRACE_PER_CPU) {
>> +                       zonenum = smp_processor_id();
>> +                       lock = PSTORE_RAM_NOLOCK;
>> +               } else {
>> +                       zonenum = 0;
>> +                       lock = PSTORE_RAM_LOCK;
>> +               }
>> +
>> +               persistent_ram_write(cxt->fprzs[zonenum], buf, size, lock);
>>                 return 0;
>>         } else if (type == PSTORE_TYPE_PMSG) {
>>                 pr_warn_ratelimited("ramoops: warning: PMSG shouldn't call 
>> %s\n",
>> @@ -352,6 +375,7 @@ static int ramoops_pstore_erase(enum pstore_type_id 
>> type, u64 id, int count,
>>  {
>>         struct ramoops_context *cxt = psi->data;
>>         struct persistent_ram_zone *prz;
>> +       int max;
>>
>>         switch (type) {
>>         case PSTORE_TYPE_DMESG:
>> @@ -363,7 +387,10 @@ static int ramoops_pstore_erase(enum pstore_type_id 
>> type, u64 id, int count,
>>                 prz = cxt->cprz;
>>                 break;
>>         case PSTORE_TYPE_FTRACE:
>> -               prz = cxt->fprz;
>> +               max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
>> +               if (id >= max)
>> +                       return -EINVAL;
>> +               prz = cxt->fprzs[id];
>>                 break;
>>         case PSTORE_TYPE_PMSG:
>>                 prz = cxt->mprz;
>> @@ -394,14 +421,23 @@ static void ramoops_free_przs(struct ramoops_context 
>> *cxt)
>>  {
>>         int i;
>>
>> -       if (!cxt->przs)
>> -               return;
>> +       /* Free dump PRZs */
>> +       if (cxt->przs) {
>> +               for (i = 0; i < cxt->max_dump_cnt; i++)
>> +                       persistent_ram_free(cxt->przs[i]);
>>
>> -       for (i = 0; i < cxt->max_dump_cnt; i++)
>> -               persistent_ram_free(cxt->przs[i]);
>> +               kfree(cxt->przs);
>> +               cxt->max_dump_cnt = 0;
>> +       }
>>
>> -       kfree(cxt->przs);
>> -       cxt->max_dump_cnt = 0;
>> +       /* Free ftrace PRZs */
>> +       if (cxt->fprzs) {
>> +               int nr = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
>> +
>> +               for (i = 0; i < nr; i++)
>> +                       persistent_ram_free(cxt->fprzs[i]);
>> +               kfree(cxt->fprzs);
>> +       }
>>  }
>>
>>  static int ramoops_init_przs(struct device *dev, struct ramoops_context 
>> *cxt,
>> @@ -618,6 +654,7 @@ static int ramoops_probe(struct platform_device *pdev)
>>         cxt->ftrace_size = pdata->ftrace_size;
>>         cxt->pmsg_size = pdata->pmsg_size;
>>         cxt->dump_oops = pdata->dump_oops;
>> +       cxt->flags = pdata->flags;
>>         cxt->ecc_info = pdata->ecc_info;
>>
>>         paddr = cxt->phys_addr;
>> @@ -633,8 +670,9 @@ static int ramoops_probe(struct platform_device *pdev)
>>         if (err)
>>                 goto fail_init_cprz;
>>
>> -       err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, 
>> cxt->ftrace_size,
>> -                              LINUX_VERSION_CODE);
>> +       err = ramoops_init_przs(dev, cxt, &cxt->fprzs, &paddr, 
>> cxt->ftrace_size,
>> +                               (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 
>> 1,
>> +                               LINUX_VERSION_CODE);
>>         if (err)
>>                 goto fail_init_fprz;
>>
>> @@ -698,7 +736,6 @@ fail_clear:
>>         cxt->pstore.bufsize = 0;
>>         persistent_ram_free(cxt->mprz);
>>  fail_init_mprz:
>> -       persistent_ram_free(cxt->fprz);
>>  fail_init_fprz:
>>         persistent_ram_free(cxt->cprz);
>>  fail_init_cprz:
>> @@ -717,7 +754,6 @@ static int ramoops_remove(struct platform_device *pdev)
>>         cxt->pstore.bufsize = 0;
>>
>>         persistent_ram_free(cxt->mprz);
>> -       persistent_ram_free(cxt->fprz);
>>         persistent_ram_free(cxt->cprz);
>>         ramoops_free_przs(cxt);
>>
>> @@ -759,6 +795,8 @@ static void ramoops_register_dummy(void)
>>         dummy_data->ftrace_size = ramoops_ftrace_size;
>>         dummy_data->pmsg_size = ramoops_pmsg_size;
>>         dummy_data->dump_oops = dump_oops;
>> +       dummy_data->flags = FTRACE_PER_CPU;
>> +
>>         /*
>>          * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
>>          * (using 1 byte for ECC isn't much of use anyway).
>> diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
>> index 69f3487..122f78d 100644
>> --- a/include/linux/pstore_ram.h
>> +++ b/include/linux/pstore_ram.h
>> @@ -86,6 +86,8 @@ ssize_t persistent_ram_ecc_string(struct 
>> persistent_ram_zone *prz,
>>   * @mem_address        physical memory address to contain ramoops
>>   */
>>
>> +#define FTRACE_PER_CPU BIT(0)
>> +
>>  struct ramoops_platform_data {
>>         unsigned long   mem_size;
>>         phys_addr_t     mem_address;
>> @@ -95,6 +97,7 @@ struct ramoops_platform_data {
>>         unsigned long   ftrace_size;
>>         unsigned long   pmsg_size;
>>         int             dump_oops;
>> +       int             flags;
>>         struct persistent_ram_ecc_info ecc_info;
>>  };
>>
>> --
>> 2.7.4
>>
>
> Is there a case for not setting FTRACE_PER_CPU?

Yes, if there are too many CPUs and only a few are in use, then the
rest of the space allocated for the other CPUs not in use go waste. If
the ram zone is begin enough though, then it still makes sense even
for the many CPUs case.

Thanks,
Joel

Reply via email to