Re: [PATCH v2 08/10] EDAC/ghes: Carve out MC device handling into separate functions

2020-05-19 Thread Robert Richter
On 11.05.20 15:32:03, Borislav Petkov wrote:
> see below. It probably doesn't work but this is what it should do -
> straightforward and simple.
> 
> And now that I've looked at this in more detail, this whole DIMM
> counting is still not doing what it should do.
> 
> So lemme try again:

As you have major concerns with my code that deals with ghes private
dimm data, let's just keep smbios_handle in struct dimm_info.  ATM I
do not see any alternative solution suggested how this could be
implemented. So I am going to drop patch '[PATCH v2 04/10] EDAC/ghes:
Make SMBIOS handle private data to ghes' from this series.

Thanks,

-Robert


Re: [PATCH v2 08/10] EDAC/ghes: Carve out MC device handling into separate functions

2020-05-11 Thread Borislav Petkov
Hi,

see below. It probably doesn't work but this is what it should do -
straightforward and simple.

And now that I've looked at this in more detail, this whole DIMM
counting is still not doing what it should do.

So lemme try again:

1. Counting the DIMMs.

You should add a function which is called

ghes_count_dimms()

or so, which does:

/* Get the number of DIMMs */
dmi_walk(ghes_edac_count_dimms, _dimm);

That function should run once at entry of ghes_edac_register().

When that function is done, it should spit out a data structure which
has this mapping:

memory controller   DIMMs
0   0-x
1   x+1 - y
2   y+1 - ...

and so on.

This basically will contain the DIMM layout of the system along with the
mapping which memory controller has which DIMMs.

So that you don't have to do edac_get_dimm_by_index() and rely on having
called edac_mc_alloc() *prior* to calling edac_get_dimm_by_index() so
that mci->dimms[] is properly populated and former can give you a proper
dimm_info pointer.

You can also merge the functionality of ghes_edac_dmidecode() together
and so on so that you scan all the DIMM information needed at *once* so
that you can allocate an mci properly.

2. Memory controller allocation. Once the parsing is done, you do

edac_mc_alloc()

for the respective memory controller using the mapping above which you
have parsed at init.

3. When the allocation is done, you set the proper DIMM handles of
the dimms of this mci so that find_dimm_by_handle() can find the DIMM
correctly.

Now, here's the important part: if ghes_edac_report_mem_error() can get
the memory controller which is in error - struct cper_sec_mem_err has a
node member but that all depends on whether your BIOS even populates it
properly - then find_dimm_by_handle() can even take an mci and search
only through the DIMMs of that memory controller by the handle.

If the BIOS does not populate them properly, then you can simply search
that mapping list of *all* DIMMs which ghes_count_dimms() has created
before.

4. You lastly add edac_mc_add_mc() and all good.

The benefits of this whole approach is that you will have a single data
structure representating the DIMMs in the system and you can traverse
it back'n'forth.

The code will be simple and straight-forward and not with those DMI
callbacks here and there, picking out pieces of needed info instead of
doing a whole system scan *once* at *init* and then working with the
parsed info.

I sincerely hope that this makes sense.

Thx.

---
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 4eadc5b344c8..396945634a2a 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -535,15 +535,83 @@ static struct acpi_platform_list plat_list[] = {
{ } /* End */
 };
 
+static struct mem_ctl_info *ghes_mc_alloc(struct device *dev, int mc_idx,
+  int num_dimm)
+{
+   struct edac_mc_layer layers[1];
+   struct mem_ctl_info *mci;
+   struct ghes_mci *pvt;
+
+   layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+   layers[0].size = num_dimm;
+   layers[0].is_virt_csrow = true;
+
+   mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+   if (!mci)
+   return NULL;
+
+   pvt = mci->pvt_info;
+   pvt->mci= mci;
+
+   mci->pdev = dev;
+   mci->mtype_cap = MEM_FLAG_EMPTY;
+   mci->edac_ctl_cap = EDAC_FLAG_NONE;
+   mci->edac_cap = EDAC_FLAG_NONE;
+   mci->mod_name = "ghes_edac.c";
+   mci->ctl_name = "ghes_edac";
+   mci->dev_name = "ghes";
+
+   return mci;
+}
+
+static int ghes_mc_add(struct mem_ctl_info *mci, struct list_head *dimm_list)
+{
+   unsigned long flags;
+   int rc;
+
+   rc = edac_mc_add_mc(mci);
+   if (rc < 0)
+   return rc;
+
+   spin_lock_irqsave(_lock, flags);
+   ghes_pvt = mci->pvt_info;
+   list_splice_tail(dimm_list, _dimm_list);
+   spin_unlock_irqrestore(_lock, flags);
+
+   return 0;
+}
+
+static void ghes_mc_free(void)
+{
+   struct mem_ctl_info *mci;
+   unsigned long flags;
+   LIST_HEAD(dimm_list);
+
+   /*
+* Wait for the irq handler being finished.
+*/
+   spin_lock_irqsave(_lock, flags);
+   mci = ghes_pvt ? ghes_pvt->mci : NULL;
+   ghes_pvt = NULL;
+   list_splice_init(_dimm_list, _list);
+   spin_unlock_irqrestore(_lock, flags);
+
+   ghes_dimm_release(_list);
+
+   if (!mci)
+   return;
+
+   mci = edac_mc_del_mc(mci->pdev);
+   if (mci)
+   edac_mc_free(mci);
+}
+
 int ghes_edac_register(struct ghes *ghes, struct device *dev)
 {
bool fake = false;
int rc = 0, num_dimm = 0;
struct mem_ctl_info *mci;
-   struct ghes_mci *pvt;
-   struct edac_mc_layer layers[1];
struct dimm_fill 

Re: [PATCH v2 08/10] EDAC/ghes: Carve out MC device handling into separate functions

2020-05-06 Thread Robert Richter
On 27.04.20 18:38:56, Borislav Petkov wrote:
> On Wed, Apr 22, 2020 at 01:58:12PM +0200, Robert Richter wrote:

> > +static int ghes_mc_add_or_free(struct mem_ctl_info *mci,
> > +   struct list_head *dimm_list)
> 
> No, I think we talked about this already. This function should be
> called:
> 
>   ghes_mc_add()
> 
> and should do one thing and one thing only in good old unix tradition:
> add the MC.
> 
> > +{
> > unsigned long flags;
> > -   int idx = -1;
> > +   int rc;
> > +
> > +   rc = edac_mc_add_mc(mci);
> > +   if (rc < 0) {
> 
> > +   ghes_dimm_release(dimm_list);
> > +   edac_mc_free(mci);
> > +   return rc;
> 
> Those last three lines should be called by the *caller* of
> ghes_mc_add(), when latter returns an error value.

These direct operations are nothing a caller should deal with.

The caller does now:

mci = ghes_mc_create(...);
... /* prepare dimms */
return ghes_mc_add_or_free(...);

To shut it down we just use:

ghes_mc_free();

Pretty simple.

Now, lets look at your suggestion to put it out of the function. A
caller always needs to free the mci and dimms, so we will get:

int rc;
mci = ghes_mc_create(...);
... /* prepare dimms */
rc = ghes_mc_add(...);
if (rc < 0) {
/* free mci */
/* free dimms */
...
}
return rc;

We loose the tail call and simplicity here. Note this duplicates code
as there are 2 users of ghes_mc_add().

Now, the caller does not know the implementation details, so we need
to provide another release function (let's call it *_release() here):

mci = ghes_mc_create(...);
... /* prepare dimms */
rc = ghes_mc_add(...);
if (rc < 0) {
ghes_mc_release(mci);
ghes_dimm_release(dimm_list);
}
return rc;

Ok, now there is another function needed to release everything.

This design also impacts ghes_mc_free(). So the shutdown
implementation would turn to:

struct mem_ctl_info *mci;
...
mci = ghes_mc_del();
ghes_mc_release(mci);
...

I don't see any benefit. See also below for the delta of an
implementation of the suggested changes.

> 
> > +   }
> > +
> > +   spin_lock_irqsave(_lock, flags);
> > +   ghes_pvt = mci->pvt_info;
> > +   list_splice_tail(dimm_list, _dimm_list);
> > +   spin_unlock_irqrestore(_lock, flags);
> > +
> > +   return 0;
> > +}
> > +
> > +static void ghes_mc_free(void)
> > +{
> > +   struct mem_ctl_info *mci;
> > +   unsigned long flags;
> > +   LIST_HEAD(dimm_list);
> > +
> > +   /*
> > +* Wait for the irq handler being finished.
> > +*/
> > +   spin_lock_irqsave(_lock, flags);
> > +   mci = ghes_pvt ? ghes_pvt->mci : NULL;
> > +   ghes_pvt = NULL;
> > +   list_splice_init(_dimm_list, _list);
> > +   spin_unlock_irqrestore(_lock, flags);
> > +
> > +   ghes_dimm_release(_list);
> > +
> > +   if (!mci)
> > +   return;
> > +
> > +   mci = edac_mc_del_mc(mci->pdev);
> > +   if (mci)
> > +   edac_mc_free(mci);
> > +}
> 
> This function needs to do only freeing of the mc. The list splicing and
> dimm releasing needs to be done by its caller, before calling it.

ghes_mc_free() is the counterpart to ghes_mc_add() and thus needs to
also handle the dimm_list here. This cannot be left to the caller.

Considering all the above, I don't see how your suggestions to the
interface could improve the code. Hmm...

Below an implementation that illustrates the changes.

Thanks,

-Robert

---
 drivers/edac/ghes_edac.c | 41 +++-
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 7f39346d895b..896d7b488fc2 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -576,18 +576,14 @@ static struct mem_ctl_info *ghes_mc_create(struct device 
*dev, int mc_idx,
return mci;
 }
 
-static int ghes_mc_add_or_free(struct mem_ctl_info *mci,
-  struct list_head *dimms)
+static int ghes_mc_add(struct mem_ctl_info *mci, struct list_head *dimms)
 {
unsigned long flags;
int rc;
 
rc = edac_mc_add_mc(mci);
-   if (rc < 0) {
-   dimm_release(dimms);
-   edac_mc_free(mci);
+   if (rc < 0)
return rc;
-   }
 
spin_lock_irqsave(_lock, flags);
ghes_pvt = mci->pvt_info;
@@ -597,7 +593,7 @@ static int ghes_mc_add_or_free(struct mem_ctl_info *mci,
return 0;
 }
 
-static void ghes_mc_free(void)
+static struct mem_ctl_info *ghes_mc_del(void)
 {
struct mem_ctl_info *mci;
unsigned long flags;
@@ -614,10 +610,14 @@ static void ghes_mc_free(void)
 
dimm_release();
 
-   if (!mci)
-   return;
+   if (mci)
+   mci = edac_mc_del_mc(mci->pdev);
 
-   mci = edac_mc_del_mc(mci->pdev);
+