[RESEND PATCH v4 2/6] libsas: shut down the PHY if events reached the threshold

2017-09-19 Thread Jason Yan
If the PHY burst too many events, we will alloc a lot of events for the
worker. This may leads to memory exhaustion.

Dan Williams suggested to shut down the PHY if the events reached the
threshold, because in this case the PHY may have gone into some
erroneous state. Users can re-enable the PHY by sysfs if they want.

We cannot use the fixed memory pool because if we run out of events, the
shut down event and loss of signal event will lost too. The events still
need to be allocated and processed in this case.

Suggested-by: Dan Williams 
Signed-off-by: Jason Yan 
CC: John Garry 
CC: Johannes Thumshirn 
CC: Ewan Milne 
CC: Christoph Hellwig 
CC: Tomas Henzl 
---
 drivers/scsi/libsas/sas_init.c | 21 -
 drivers/scsi/libsas/sas_phy.c  | 31 ++-
 include/scsi/libsas.h  |  6 ++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 85c278a..b1e03d5 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -122,6 +122,8 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
INIT_LIST_HEAD(_ha->defer_q);
INIT_LIST_HEAD(_ha->eh_dev_q);
 
+   sas_ha->event_thres = SAS_PHY_SHUTDOWN_THRES;
+
error = sas_register_phys(sas_ha);
if (error) {
printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
@@ -556,14 +558,31 @@ EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
 
 struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
 {
+   struct asd_sas_event *event;
gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 
-   return kmem_cache_zalloc(sas_event_cache, flags);
+   event = kmem_cache_zalloc(sas_event_cache, flags);
+   if (!event)
+   return NULL;
+
+   atomic_inc(>event_nr);
+   if (atomic_read(>event_nr) > phy->ha->event_thres &&
+   !phy->in_shutdown) {
+   phy->in_shutdown = 1;
+   sas_printk("The phy%02d bursting events, shut it down.\n",
+  phy->id);
+   sas_notify_phy_event(phy, PHYE_SHUTDOWN);
+   }
+
+   return event;
 }
 
 void sas_free_event(struct asd_sas_event *event)
 {
+   struct asd_sas_phy *phy = event->phy;
+
kmem_cache_free(sas_event_cache, event);
+   atomic_dec(>event_nr);
 }
 
 /* -- SAS Class register/unregister -- */
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 59f8292..3df1eec 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -35,6 +35,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
 
+   phy->in_shutdown = 0;
phy->error = 0;
sas_deform_port(phy, 1);
 }
@@ -44,6 +45,7 @@ static void sas_phye_oob_done(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
 
+   phy->in_shutdown = 0;
phy->error = 0;
 }
 
@@ -105,6 +107,32 @@ static void sas_phye_resume_timeout(struct work_struct 
*work)
 }
 
 
+static void sas_phye_shutdown(struct work_struct *work)
+{
+   struct asd_sas_event *ev = to_asd_sas_event(work);
+   struct asd_sas_phy *phy = ev->phy;
+   struct sas_ha_struct *sas_ha = phy->ha;
+   struct sas_internal *i =
+   to_sas_internal(sas_ha->core.shost->transportt);
+
+   if (phy->enabled && i->dft->lldd_control_phy) {
+   int ret;
+
+   phy->error = 0;
+   phy->enabled = 0;
+   ret = i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
+   if (ret)
+   sas_printk("lldd disable phy%02d returned %d\n",
+   phy->id, ret);
+
+   } else if (!i->dft->lldd_control_phy)
+   sas_printk("lldd does not support phy%02d control\n", phy->id);
+   else
+   sas_printk("phy%02d is not enabled, cannot shutdown\n",
+   phy->id);
+
+}
+
 /* -- Phy class registration -- */
 
 int sas_register_phys(struct sas_ha_struct *sas_ha)
@@ -116,6 +144,7 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
struct asd_sas_phy *phy = sas_ha->sas_phy[i];
 
phy->error = 0;
+   atomic_set(>event_nr, 0);
INIT_LIST_HEAD(>port_phy_el);
 
phy->port = NULL;
@@ -151,5 +180,5 @@ const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
[PHYE_OOB_ERROR] = sas_phye_oob_error,
[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
[PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
-
+   [PHYE_SHUTDOWN] = sas_phye_shutdown,
 };
diff --git 

[RESEND PATCH v4 2/6] libsas: shut down the PHY if events reached the threshold

2017-09-19 Thread Jason Yan
If the PHY burst too many events, we will alloc a lot of events for the
worker. This may leads to memory exhaustion.

Dan Williams suggested to shut down the PHY if the events reached the
threshold, because in this case the PHY may have gone into some
erroneous state. Users can re-enable the PHY by sysfs if they want.

We cannot use the fixed memory pool because if we run out of events, the
shut down event and loss of signal event will lost too. The events still
need to be allocated and processed in this case.

Suggested-by: Dan Williams 
Signed-off-by: Jason Yan 
CC: John Garry 
CC: Johannes Thumshirn 
CC: Ewan Milne 
CC: Christoph Hellwig 
CC: Tomas Henzl 
---
 drivers/scsi/libsas/sas_init.c | 21 -
 drivers/scsi/libsas/sas_phy.c  | 31 ++-
 include/scsi/libsas.h  |  6 ++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 85c278a..b1e03d5 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -122,6 +122,8 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
INIT_LIST_HEAD(_ha->defer_q);
INIT_LIST_HEAD(_ha->eh_dev_q);
 
+   sas_ha->event_thres = SAS_PHY_SHUTDOWN_THRES;
+
error = sas_register_phys(sas_ha);
if (error) {
printk(KERN_NOTICE "couldn't register sas phys:%d\n", error);
@@ -556,14 +558,31 @@ EXPORT_SYMBOL_GPL(sas_domain_attach_transport);
 
 struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy)
 {
+   struct asd_sas_event *event;
gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 
-   return kmem_cache_zalloc(sas_event_cache, flags);
+   event = kmem_cache_zalloc(sas_event_cache, flags);
+   if (!event)
+   return NULL;
+
+   atomic_inc(>event_nr);
+   if (atomic_read(>event_nr) > phy->ha->event_thres &&
+   !phy->in_shutdown) {
+   phy->in_shutdown = 1;
+   sas_printk("The phy%02d bursting events, shut it down.\n",
+  phy->id);
+   sas_notify_phy_event(phy, PHYE_SHUTDOWN);
+   }
+
+   return event;
 }
 
 void sas_free_event(struct asd_sas_event *event)
 {
+   struct asd_sas_phy *phy = event->phy;
+
kmem_cache_free(sas_event_cache, event);
+   atomic_dec(>event_nr);
 }
 
 /* -- SAS Class register/unregister -- */
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index 59f8292..3df1eec 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -35,6 +35,7 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
 
+   phy->in_shutdown = 0;
phy->error = 0;
sas_deform_port(phy, 1);
 }
@@ -44,6 +45,7 @@ static void sas_phye_oob_done(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
 
+   phy->in_shutdown = 0;
phy->error = 0;
 }
 
@@ -105,6 +107,32 @@ static void sas_phye_resume_timeout(struct work_struct 
*work)
 }
 
 
+static void sas_phye_shutdown(struct work_struct *work)
+{
+   struct asd_sas_event *ev = to_asd_sas_event(work);
+   struct asd_sas_phy *phy = ev->phy;
+   struct sas_ha_struct *sas_ha = phy->ha;
+   struct sas_internal *i =
+   to_sas_internal(sas_ha->core.shost->transportt);
+
+   if (phy->enabled && i->dft->lldd_control_phy) {
+   int ret;
+
+   phy->error = 0;
+   phy->enabled = 0;
+   ret = i->dft->lldd_control_phy(phy, PHY_FUNC_DISABLE, NULL);
+   if (ret)
+   sas_printk("lldd disable phy%02d returned %d\n",
+   phy->id, ret);
+
+   } else if (!i->dft->lldd_control_phy)
+   sas_printk("lldd does not support phy%02d control\n", phy->id);
+   else
+   sas_printk("phy%02d is not enabled, cannot shutdown\n",
+   phy->id);
+
+}
+
 /* -- Phy class registration -- */
 
 int sas_register_phys(struct sas_ha_struct *sas_ha)
@@ -116,6 +144,7 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
struct asd_sas_phy *phy = sas_ha->sas_phy[i];
 
phy->error = 0;
+   atomic_set(>event_nr, 0);
INIT_LIST_HEAD(>port_phy_el);
 
phy->port = NULL;
@@ -151,5 +180,5 @@ const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
[PHYE_OOB_ERROR] = sas_phye_oob_error,
[PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
[PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
-
+   [PHYE_SHUTDOWN] = sas_phye_shutdown,
 };
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index c80321b..2fa0b13 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -75,6