Arnon Warshavsky <ar...@qwilt.com> writes:

> Local functions to this file,
> changing from void to int are non-abi-breaking.
> For handling the single function that cannot
> change from void to int due to abi,
> where this is the only place it is called in,
> I added a state variable that is being checked
> right after the call to this function.
>
> --
>
> v4 - fix split literal strings in log messages
>
> Signed-off-by: Arnon Warshavsky <ar...@qwilt.com>
> ---

Hi Arnon,

Always happy to see panic calls get removed.  I have some comments inline.

>  lib/librte_eal/bsdapp/eal/eal.c           |  86 ++++++++++++++-------
>  lib/librte_eal/bsdapp/eal/eal_thread.c    |  65 +++++++++++-----
>  lib/librte_eal/common/eal_common_launch.c |  21 ++++++
>  lib/librte_eal/common/include/rte_debug.h |  12 +++
>  lib/librte_eal/linuxapp/eal/eal.c         | 120 
> ++++++++++++++++++++----------
>  lib/librte_eal/linuxapp/eal/eal_thread.c  |  65 +++++++++++-----
>  6 files changed, 270 insertions(+), 99 deletions(-)
>
> diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
> index d996190..9c2f6f1 100644
> --- a/lib/librte_eal/bsdapp/eal/eal.c
> +++ b/lib/librte_eal/bsdapp/eal/eal.c
> @@ -151,7 +151,7 @@ enum rte_iova_mode
>   * We also don't lock the whole file, so that in future we can use read-locks
>   * on other parts, e.g. memzones, to detect if there are running secondary
>   * processes. */
> -static void
> +static int
>  rte_eal_config_create(void)
>  {
>       void *rte_mem_cfg_addr;
> @@ -160,60 +160,78 @@ enum rte_iova_mode
>       const char *pathname = eal_runtime_config_path();
>  
>       if (internal_config.no_shconf)
> -             return;
> +             return 0;
>  
>       if (mem_cfg_fd < 0){
>               mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> -             if (mem_cfg_fd < 0)
> -                     rte_panic("Cannot open '%s' for rte_mem_config\n", 
> pathname);
> +             if (mem_cfg_fd < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for 
> rte_mem_config\n",
> +                                     __func__, pathname);
> +                     return -1;
> +             }
>       }
>  
>       retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
>       if (retval < 0){
>               close(mem_cfg_fd);
> -             rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for 
> rte_mem_config\n",
> +                             __func__, pathname);
> +             return -1;

Previously, it wasn't possible for mem_cfg_fd to be reused after a
failure.  Now it is - please reset it to -1. in these close conditions.

>       }
>  
>       retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
>       if (retval < 0){
>               close(mem_cfg_fd);
> -             rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another 
> primary "
> -                             "process running?\n", pathname);
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'. Is 
> another primary process running?\n",
> +                             __func__, pathname);
> +             return -1;
>       }
>  
>       rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
>                               PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 
> 0);
>  
>       if (rte_mem_cfg_addr == MAP_FAILED){
> -             rte_panic("Cannot mmap memory for rte_config\n");
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +                             __func__);
> +             return -1;
>       }
>       memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
>       rte_config.mem_config = rte_mem_cfg_addr;
> +
> +     return 0;
>  }
>  
>  /* attach to an existing shared memory config */
> -static void
> +static int
>  rte_eal_config_attach(void)
>  {
>       void *rte_mem_cfg_addr;
>       const char *pathname = eal_runtime_config_path();
>  
>       if (internal_config.no_shconf)
> -             return;
> +             return 0;
>  
>       if (mem_cfg_fd < 0){
>               mem_cfg_fd = open(pathname, O_RDWR);
> -             if (mem_cfg_fd < 0)
> -                     rte_panic("Cannot open '%s' for rte_mem_config\n", 
> pathname);
> +             if (mem_cfg_fd < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for 
> rte_mem_config\n",
> +                                     __func__, pathname);
> +                     return -1;
> +             }
>       }
>  
>       rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
>                               PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 
> 0);
>       close(mem_cfg_fd);

Again, previously this would have aborted on a failure.  So it needs to
be reset to a value that allows retry.

> -     if (rte_mem_cfg_addr == MAP_FAILED)
> -             rte_panic("Cannot mmap memory for rte_config\n");
> +     if (rte_mem_cfg_addr == MAP_FAILED) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +                             __func__);
> +             return -1;
> +     }
>  
>       rte_config.mem_config = rte_mem_cfg_addr;
> +
> +     return 0;
>  }
>  
>  /* Detect if we are a primary or a secondary process */
> @@ -237,23 +255,28 @@ enum rte_proc_type_t
>  }
>  
>  /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
>  rte_config_init(void)
>  {
>       rte_config.process_type = internal_config.process_type;
>  
>       switch (rte_config.process_type){
>       case RTE_PROC_PRIMARY:
> -             rte_eal_config_create();
> +             if (rte_eal_config_create())
> +                     return -1;
>               break;
>       case RTE_PROC_SECONDARY:
> -             rte_eal_config_attach();
> +             if (rte_eal_config_attach())
> +                     return -1;
>               rte_eal_mcfg_wait_complete(rte_config.mem_config);
>               break;
>       case RTE_PROC_AUTO:
>       case RTE_PROC_INVALID:

Not for this patch, but I just noticed that this should probably use a
'default' case.

> -             rte_panic("Invalid process type\n");
> +             RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> +                             __func__, rte_config.process_type);
> +             return -1;
>       }
> +     return 0;
>  }
>  
>  /* display usage */
> @@ -595,7 +618,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>       rte_srand(rte_rdtsc());
>  
> -     rte_config_init();
> +     if (rte_config_init() != 0)
> +             return -1;

Use rte_eal_init_alert to indicate why you are failing the init.

>       if (rte_mp_channel_init() < 0) {
>               rte_eal_init_alert("failed to init mp channel\n");
> @@ -652,7 +676,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>       eal_check_mem_on_local_socket();
>  
> -     eal_thread_init_master(rte_config.master_lcore);
> +     if (eal_thread_init_master(rte_config.master_lcore) != 0)
> +             return -1;

Is it ever possible to recover from this?  Still needs
rte_eal_init_alert() call.

>  
>       ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -666,18 +691,27 @@ static void rte_eal_init_alert(const char *msg)
>                * create communication pipes between master thread
>                * and children
>                */
> -             if (pipe(lcore_config[i].pipe_master2slave) < 0)
> -                     rte_panic("Cannot create pipe\n");
> -             if (pipe(lcore_config[i].pipe_slave2master) < 0)
> -                     rte_panic("Cannot create pipe\n");
> +             if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +                                     __func__);
> +                     return -1;
> +             }
> +             if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +                                     __func__);
> +                     return -1;
> +             }

How are you cleaning up the threads that were spawned?  Lets say this
loop will execute 5 times, and on the 3rd entry, these errors happen.
You now leave DPDK 'half-initialized' - you've spun up threads and
allocated memory.

Also, again use rte_eal_init_alert().  It was added for a reason :)

>  
>               lcore_config[i].state = WAIT;
>  
>               /* create a thread for each lcore */
>               ret = pthread_create(&lcore_config[i].thread_id, NULL,
>                                    eal_thread_loop, NULL);
> -             if (ret != 0)
> -                     rte_panic("Cannot create thread\n");
> +             if (ret != 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> +                                     __func__);
> +                     return -1;
> +             }

Same question as before.  If pthread_create is failing, there are worse
problems than aborting.

>               /* Set thread_name for aid in debugging. */
>               snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c 
> b/lib/librte_eal/bsdapp/eal/eal_thread.c
> index d602daf..5c3947c 100644
> --- a/lib/librte_eal/bsdapp/eal/eal_thread.c
> +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
> @@ -51,16 +51,22 @@
>       n = 0;
>       while (n == 0 || (n < 0 && errno == EINTR))
>               n = write(m2s, &c, 1);
> -     if (n < 0)
> -             rte_panic("cannot write on configuration pipe\n");
> +     if (n < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +                             __func__);
> +             return -1;
> +     }
>  
>       /* wait ack */
>       do {
>               n = read(s2m, &c, 1);
>       } while (n < 0 && errno == EINTR);
>  
> -     if (n <= 0)
> -             rte_panic("cannot read on configuration pipe\n");
> +     if (n <= 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +                             __func__);
> +             return -1;
> +     }
>  
>       return 0;
>  }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
>       RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>       /* set CPU affinity */
> -     if (eal_thread_set_affinity() < 0)
> -             rte_panic("cannot set affinity\n");
> +     if (eal_thread_set_affinity() < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +             rte_move_to_panic_state();
> +     }
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> +     rte_move_to_panic_state();
> +     while (1)
> +             sleep(1);
>  }

This is worse than a panic.  Users will blame applications for appearing
to freeze.  Please leave the panics in place rather than do this.

>  /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
>               if (thread_id == lcore_config[lcore_id].thread_id)
>                       break;
>       }
> -     if (lcore_id == RTE_MAX_LCORE)
> -             rte_panic("cannot retrieve lcore id\n");
> +     if (lcore_id == RTE_MAX_LCORE) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> +                             __func__);
> +             defunct_and_remain_in_endless_loop();
> +     }

I'm not even sure this check has merit, tbh.  Is there ever a chance for
an lcore thread to be spawned like this?  Probably a better patch would
just remove all the code you've inserted, but keep the check you
removed.

>       m2s = lcore_config[lcore_id].pipe_master2slave[0];
>       s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
>       RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>       /* set CPU affinity */
> -     if (eal_thread_set_affinity() < 0)
> -             rte_panic("cannot set affinity\n");
> +     if (eal_thread_set_affinity() < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +             defunct_and_remain_in_endless_loop();

How does this improve the user experience?

> +     }
>  
>       ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
>                       n = read(m2s, &c, 1);
>               } while (n < 0 && errno == EINTR);
>  
> -             if (n <= 0)
> -                     rte_panic("cannot read on configuration pipe\n");
> +             if (n <= 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration 
> pipe\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();

Same question.  Actually this could happen on shutdown, I think?  If
there's a race where the pipe is torn down before the thread?  Not sure
if there are any ordering guarantees around that.

> +             }
>  
>               lcore_config[lcore_id].state = RUNNING;
>  
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
>               n = 0;
>               while (n == 0 || (n < 0 && errno == EINTR))
>                       n = write(s2m, &c, 1);
> -             if (n < 0)
> -                     rte_panic("cannot write on configuration pipe\n");
> -
> -             if (lcore_config[lcore_id].f == NULL)
> -                     rte_panic("NULL function pointer\n");
> +             if (n < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration 
> pipe\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();
> +             }
> +
> +             if (lcore_config[lcore_id].f == NULL) {
> +                     RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();
> +             }

I don't see how any of this is better for the user.  In fact, I think
this is worse because it will make portions of the application stop
working without any way to move forward.  rte_panic() will at least give
the process a chance to recover from a potentially ephemeral condition.

>               /* call the function and store the return value */
>               fct_arg = lcore_config[lcore_id].arg;
> diff --git a/lib/librte_eal/common/eal_common_launch.c 
> b/lib/librte_eal/common/eal_common_launch.c
> index fe0ba3f..6f8bd46 100644
> --- a/lib/librte_eal/common/eal_common_launch.c
> +++ b/lib/librte_eal/common/eal_common_launch.c
> @@ -14,6 +14,7 @@
>  #include <rte_pause.h>
>  #include <rte_per_lcore.h>
>  #include <rte_lcore.h>
> +#include <rte_debug.h>
>  
>  /*
>   * Wait until a lcore finished its job.
> @@ -88,3 +89,23 @@ enum rte_lcore_state_t
>               rte_eal_wait_lcore(lcore_id);
>       }
>  }
> +
> +/* panic state */
> +static int _panic_state;
> +
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void)
> +{
> +     return _panic_state;
> +}
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void)
> +{
> +     _panic_state = 1;
> +}
> diff --git a/lib/librte_eal/common/include/rte_debug.h 
> b/lib/librte_eal/common/include/rte_debug.h
> index 272df49..b421d33 100644
> --- a/lib/librte_eal/common/include/rte_debug.h
> +++ b/lib/librte_eal/common/include/rte_debug.h
> @@ -79,4 +79,16 @@ void __rte_panic(const char *funcname , const char 
> *format, ...)
>  }
>  #endif
>  
> +/**
> + * Check if the system is in panic state
> + * @return int
> + */
> +int rte_get_panic_state(void);
> +
> +/**
> + * Move the system to be in panic state
> + */
> +void rte_move_to_panic_state(void);

This seems to only exist as a way of triggering the run_once check in
the eal_init.  It doesn't add anything except one more state variable to
check against.  What is the purpose?

Further, it seems unrelated to removing panics.

> +
>  #endif /* _RTE_DEBUG_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c 
> b/lib/librte_eal/linuxapp/eal/eal.c
> index 21afa73..393441a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -160,7 +160,7 @@ enum rte_iova_mode
>   * We also don't lock the whole file, so that in future we can use read-locks
>   * on other parts, e.g. memzones, to detect if there are running secondary
>   * processes. */
> -static void
> +static int
>  rte_eal_config_create(void)
>  {
>       void *rte_mem_cfg_addr;
> @@ -169,7 +169,7 @@ enum rte_iova_mode
>       const char *pathname = eal_runtime_config_path();
>  
>       if (internal_config.no_shconf)
> -             return;
> +             return 0;
>  
>       /* map the config before hugepage address so that we don't waste a page 
> */
>       if (internal_config.base_virtaddr != 0)
> @@ -179,30 +179,39 @@ enum rte_iova_mode
>       else
>               rte_mem_cfg_addr = NULL;
>  
> -     if (mem_cfg_fd < 0){
> +     if (mem_cfg_fd < 0) {
>               mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
> -             if (mem_cfg_fd < 0)
> -                     rte_panic("Cannot open '%s' for rte_mem_config\n", 
> pathname);
> +             if (mem_cfg_fd < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for 
> rte_mem_config\n",
> +                             __func__, pathname);
> +                     return -1;
> +             }
>       }
>  
>       retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
> -     if (retval < 0){
> +     if (retval < 0) {
>               close(mem_cfg_fd);
> -             rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot resize '%s' for 
> rte_mem_config\n",
> +                             __func__, pathname);
> +             return -1;
>       }
>  
>       retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
> -     if (retval < 0){
> +     if (retval < 0) {
>               close(mem_cfg_fd);
> -             rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another 
> primary "
> -                             "process running?\n", pathname);
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot create lock on '%s'."
> +                             " Is another primary process running?\n",
> +                             __func__, pathname);
> +             return -1;
>       }
>  
>       rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, 
> sizeof(*rte_config.mem_config),
>                               PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 
> 0);
>  
> -     if (rte_mem_cfg_addr == MAP_FAILED){
> -             rte_panic("Cannot mmap memory for rte_config\n");
> +     if (rte_mem_cfg_addr == MAP_FAILED) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config\n",
> +                     __func__);
> +             return -1;
>       }
>       memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
>       rte_config.mem_config = rte_mem_cfg_addr;
> @@ -211,10 +220,11 @@ enum rte_iova_mode
>        * processes could later map the config into this exact location */
>       rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
>  
> +     return 0;
>  }
>  
>  /* attach to an existing shared memory config */
> -static void
> +static int
>  rte_eal_config_attach(void)
>  {
>       struct rte_mem_config *mem_config;
> @@ -222,33 +232,40 @@ enum rte_iova_mode
>       const char *pathname = eal_runtime_config_path();
>  
>       if (internal_config.no_shconf)
> -             return;
> +             return 0;
>  
> -     if (mem_cfg_fd < 0){
> +     if (mem_cfg_fd < 0) {
>               mem_cfg_fd = open(pathname, O_RDWR);
> -             if (mem_cfg_fd < 0)
> -                     rte_panic("Cannot open '%s' for rte_mem_config\n", 
> pathname);
> +             if (mem_cfg_fd < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot open '%s' for 
> rte_mem_config\n",
> +                                             __func__, pathname);
> +                     return -1;
> +             }
>       }
>  
>       /* map it as read-only first */
>       mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
>                       PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
> -     if (mem_config == MAP_FAILED)
> -             rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n",
> -                       errno, strerror(errno));
> +     if (mem_config == MAP_FAILED) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for rte_config! 
> error %i (%s)\n",
> +                             __func__, errno, strerror(errno));
> +             return -1;
> +     }
>  
>       rte_config.mem_config = mem_config;
> +
> +     return 0;
>  }
>  
>  /* reattach the shared config at exact memory location primary process has 
> it */
> -static void
> +static int
>  rte_eal_config_reattach(void)
>  {
>       struct rte_mem_config *mem_config;
>       void *rte_mem_cfg_addr;
>  
>       if (internal_config.no_shconf)
> -             return;
> +             return 0;
>  
>       /* save the address primary process has mapped shared config to */
>       rte_mem_cfg_addr = (void *) (uintptr_t) 
> rte_config.mem_config->mem_cfg_addr;
> @@ -263,16 +280,21 @@ enum rte_iova_mode
>       if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) {
>               if (mem_config != MAP_FAILED)
>                       /* errno is stale, don't use */
> -                     rte_panic("Cannot mmap memory for rte_config at [%p], 
> got [%p]"
> -                               " - please use '--base-virtaddr' option\n",
> -                               rte_mem_cfg_addr, mem_config);
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> +                                     "rte_config at [%p], got [%p] - please 
> use "
> +                                     "'--base-virtaddr' option\n",
> +                                     __func__, rte_mem_cfg_addr, mem_config);
>               else
> -                     rte_panic("Cannot mmap memory for rte_config! error %i 
> (%s)\n",
> -                               errno, strerror(errno));
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot mmap memory for "
> +                                     "rte_config! error %i (%s)\n",
> +                                     __func__, errno, strerror(errno));
> +             return -1;
>       }
>       close(mem_cfg_fd);
>  
>       rte_config.mem_config = mem_config;
> +
> +     return 0;
>  }
>  
>  /* Detect if we are a primary or a secondary process */
> @@ -296,24 +318,31 @@ enum rte_proc_type_t
>  }
>  
>  /* Sets up rte_config structure with the pointer to shared memory config.*/
> -static void
> +static int
>  rte_config_init(void)
>  {
>       rte_config.process_type = internal_config.process_type;
>  
>       switch (rte_config.process_type){
>       case RTE_PROC_PRIMARY:
> -             rte_eal_config_create();
> +             if (rte_eal_config_create() != 0)
> +                     return -1;
>               break;
>       case RTE_PROC_SECONDARY:
> -             rte_eal_config_attach();
> +             if (rte_eal_config_attach() != 0)
> +                     return -1;
>               rte_eal_mcfg_wait_complete(rte_config.mem_config);
> -             rte_eal_config_reattach();
> +             if (rte_eal_config_reattach() != 0)
> +                     return -1;
>               break;
>       case RTE_PROC_AUTO:
>       case RTE_PROC_INVALID:
> -             rte_panic("Invalid process type\n");
> +             RTE_LOG(CRIT, EAL, "%s(): Invalid process type %d\n",
> +                             __func__, rte_config.process_type);
> +             return -1;
>       }
> +
> +     return 0;
>  }
>  
>  /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
> @@ -820,7 +849,8 @@ static void rte_eal_init_alert(const char *msg)
>  
>       rte_srand(rte_rdtsc());
>  
> -     rte_config_init();
> +     if (rte_config_init() != 0)
> +             return -1;
>  
>       if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
>               rte_eal_init_alert("Cannot init logging.");
> @@ -892,6 +922,9 @@ static void rte_eal_init_alert(const char *msg)
>  
>       eal_thread_init_master(rte_config.master_lcore);
>  
> +     if (rte_get_panic_state())
> +             return -1;
> +

Please just use run_once.  That's a better way of preventing this.

>       ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
>       RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
> @@ -909,18 +942,27 @@ static void rte_eal_init_alert(const char *msg)
>                * create communication pipes between master thread
>                * and children
>                */
> -             if (pipe(lcore_config[i].pipe_master2slave) < 0)
> -                     rte_panic("Cannot create pipe\n");
> -             if (pipe(lcore_config[i].pipe_slave2master) < 0)
> -                     rte_panic("Cannot create pipe\n");
> +             if (pipe(lcore_config[i].pipe_master2slave) < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +                                     __func__);
> +                     return -1;
> +             }
> +             if (pipe(lcore_config[i].pipe_slave2master) < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create pipe\n",
> +                                     __func__);
> +                     return -1;
> +             }
>  
>               lcore_config[i].state = WAIT;
>  
>               /* create a thread for each lcore */
>               ret = pthread_create(&lcore_config[i].thread_id, NULL,
>                                    eal_thread_loop, NULL);
> -             if (ret != 0)
> -                     rte_panic("Cannot create thread\n");
> +             if (ret != 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot create thread\n",
> +                                     __func__);
> +                     return -1;
> +             }
>  
>               /* Set thread_name for aid in debugging. */
>               snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
> diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c 
> b/lib/librte_eal/linuxapp/eal/eal_thread.c
> index 08e150b..3afcee5 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_thread.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c

All of the comments from the bsd side apply here.

> @@ -51,16 +51,22 @@
>       n = 0;
>       while (n == 0 || (n < 0 && errno == EINTR))
>               n = write(m2s, &c, 1);
> -     if (n < 0)
> -             rte_panic("cannot write on configuration pipe\n");
> +     if (n < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration pipe\n",
> +                             __func__);
> +             return -1;
> +     }
>  
>       /* wait ack */
>       do {
>               n = read(s2m, &c, 1);
>       } while (n < 0 && errno == EINTR);
>  
> -     if (n <= 0)
> -             rte_panic("cannot read on configuration pipe\n");
> +     if (n <= 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration pipe\n",
> +                             __func__);
> +             return -1;
> +     }
>  
>       return 0;
>  }
> @@ -84,8 +90,19 @@ void eal_thread_init_master(unsigned lcore_id)
>       RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>       /* set CPU affinity */
> -     if (eal_thread_set_affinity() < 0)
> -             rte_panic("cannot set affinity\n");
> +     if (eal_thread_set_affinity() < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +             rte_move_to_panic_state();
> +     }
> +}
> +
> +/* move to panic state and do not return */
> +static __attribute__((noreturn)) void
> +defunct_and_remain_in_endless_loop(void)
> +{
> +     rte_move_to_panic_state();
> +     while (1)
> +             sleep(1);
>  }
>  
>  /* main loop of threads */
> @@ -106,8 +123,11 @@ void eal_thread_init_master(unsigned lcore_id)
>               if (thread_id == lcore_config[lcore_id].thread_id)
>                       break;
>       }
> -     if (lcore_id == RTE_MAX_LCORE)
> -             rte_panic("cannot retrieve lcore id\n");
> +     if (lcore_id == RTE_MAX_LCORE) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot retrieve lcore id\n",
> +                             __func__);
> +             defunct_and_remain_in_endless_loop();
> +     }
>  
>       m2s = lcore_config[lcore_id].pipe_master2slave[0];
>       s2m = lcore_config[lcore_id].pipe_slave2master[1];
> @@ -116,8 +136,10 @@ void eal_thread_init_master(unsigned lcore_id)
>       RTE_PER_LCORE(_lcore_id) = lcore_id;
>  
>       /* set CPU affinity */
> -     if (eal_thread_set_affinity() < 0)
> -             rte_panic("cannot set affinity\n");
> +     if (eal_thread_set_affinity() < 0) {
> +             RTE_LOG(CRIT, EAL, "%s(): Cannot set affinity\n", __func__);
> +             defunct_and_remain_in_endless_loop();
> +     }
>  
>       ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
>  
> @@ -133,8 +155,11 @@ void eal_thread_init_master(unsigned lcore_id)
>                       n = read(m2s, &c, 1);
>               } while (n < 0 && errno == EINTR);
>  
> -             if (n <= 0)
> -                     rte_panic("cannot read on configuration pipe\n");
> +             if (n <= 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot read on configuration 
> pipe\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();
> +             }
>  
>               lcore_config[lcore_id].state = RUNNING;
>  
> @@ -142,11 +167,17 @@ void eal_thread_init_master(unsigned lcore_id)
>               n = 0;
>               while (n == 0 || (n < 0 && errno == EINTR))
>                       n = write(s2m, &c, 1);
> -             if (n < 0)
> -                     rte_panic("cannot write on configuration pipe\n");
> -
> -             if (lcore_config[lcore_id].f == NULL)
> -                     rte_panic("NULL function pointer\n");
> +             if (n < 0) {
> +                     RTE_LOG(CRIT, EAL, "%s(): Cannot write on configuration 
> pipe\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();
> +             }
> +
> +             if (lcore_config[lcore_id].f == NULL) {
> +                     RTE_LOG(CRIT, EAL, "%s(): NULL function pointer\n",
> +                                     __func__);
> +                     defunct_and_remain_in_endless_loop();
> +             }
>  
>               /* call the function and store the return value */
>               fct_arg = lcore_config[lcore_id].arg;

Reply via email to