Github user paul-guo- commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1243#discussion_r118815361
  
    --- Diff: src/backend/executor/nodeShareInputScan.c ---
    @@ -793,15 +877,78 @@ shareinput_reader_waitready(int share_id, 
PlanGenerator planGen)
                }
                else if(n==0)
                {
    -                   elog(DEBUG1, "SISC READER (shareid=%d, slice=%d): Wait 
ready time out once",
    -                                   share_id, currentSliceId);
    +                   file_exists = access(writer_lock_file, F_OK);   
    +                   if(file_exists != 0)
    +                   {
    +                           elog(DEBUG3, "Wait lock file for writer time 
out interval is %d", timeout_interval);
    +                           if(timeout_interval >= 
share_input_scan_wait_lockfile_timeout || flag == true) //If lock file never 
exists or disappeared, reader will no longer waiting for writer
    +                           {
    +                                   elog(LOG, "SISC READER (shareid=%d, 
slice=%d): Wait ready time out and break",
    +                                           share_id, currentSliceId);
    +                                   pfree(writer_lock_file);
    +                                   break;
    +                           }
    +                           timeout_interval += tval.tv_sec;
    +                   }
    +                   else
    +                   {
    +                           elog(LOG, "writer lock file of 
shareinput_reader_waitready() is %s", writer_lock_file);
    +                           flag = true;
    +                           lock_fd = open(writer_lock_file, O_RDONLY);
    +                           if(lock_fd < 0)
    +                           {
    +                                   elog(DEBUG3, "Open writer's lock file 
%s failed!, error number is %d", writer_lock_file, errno);
    +                                   continue;
    +                           }
    +                           lock = flock(lock_fd, LOCK_EX | LOCK_NB);
    +                           if(lock == -1)
    +                           {
    +                                   /*
    +                                    * Reader try to lock the lock file 
which writer created until locked the lock file successfully 
    +                     * which means that writer process quit. If reader 
lock the lock file failed, it means that writer
    +                                    * process is healthy.
    +                                    */
    +                                   elog(DEBUG3, "Lock writer's lock file 
%s failed!, error number is %d", writer_lock_file, errno);
    +                           }
    +                           else if(lock == 0)
    +                           {
    +                                   /*
    +                                    * There is one situation to consider 
about.
    +                                    * Writer need a time interval to lock 
the lock file after the lock file has been created.
    +                                    * So, if reader lock the lock file 
ahead of writer, we should unlock it.
    +                                    * If reader lock the lock file after 
writer, it means that writer process has abort.
    +                                    * We should break the loop to make 
sure reader no longer wait for writer.
    +                                    */  
    +                                   if(is_lock_firsttime == true)  
    +                                   {
    +                                           lock = flock(lock_fd, LOCK_UN); 
    +                                           is_lock_firsttime = false;
    +                                           elog(DEBUG3, "Lock writer's 
lock file %s first time successfully in SISC! Unlock it.", writer_lock_file);
    +                                           continue;
    +                                   }
    +                                   else
    +                                   {
    +                                           elog(LOG, "Lock writer's lock 
file %s successfully in SISC!", writer_lock_file);
    +                                           /* Retry to close the fd in 
case there is interruption from signal */
    +                                           while ((close(lock_fd) < 0) && 
(errno == EINTR))
    --- End diff --
    
    This is a legal condition. Should  not elog(ERROR).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to