[ 
https://issues.apache.org/jira/browse/HDFS-15798?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

huhaiyang updated HDFS-15798:
-----------------------------
    Description: 
The EC refactoring task failed, and the decrementXmitsInProgress operation will 
be performed twice
 It would be XmitsInProgress of DN has negative number
{code:java}
// ErasureCodingWorker.java

public void processErasureCodingTasks(
    Collection<BlockECReconstructionInfo> ecTasks) {
  for (BlockECReconstructionInfo reconInfo : ecTasks) {
    int xmitsSubmitted = 0;
    try {
      StripedReconstructionInfo stripedReconInfo =
          new StripedReconstructionInfo(
          reconInfo.getExtendedBlock(), reconInfo.getErasureCodingPolicy(),
          reconInfo.getLiveBlockIndices(), reconInfo.getSourceDnInfos(),
          reconInfo.getTargetDnInfos(), reconInfo.getTargetStorageTypes(),
          reconInfo.getTargetStorageIDs());
      // It may throw IllegalArgumentException from task#stripedReader
      // constructor.
      final StripedBlockReconstructor task =
          new StripedBlockReconstructor(this, stripedReconInfo);
      if (task.hasValidTargets()) {
        // See HDFS-12044. We increase xmitsInProgress even the task is only
        // enqueued, so that
        //   1) NN will not send more tasks than what DN can execute and
        //   2) DN will not throw away reconstruction tasks, and instead keeps
        //      an unbounded number of tasks in the executor's task queue.
        xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1);
        getDatanode().incrementXmitsInProcess(xmitsSubmitted); // increment
        stripedReconstructionPool.submit(task);
      } else {
        LOG.warn("No missing internal block. Skip reconstruction for task:{}",
            reconInfo);
      }
    } catch (Throwable e) {
      getDatanode().decrementXmitsInProgress(xmitsSubmitted); // if 1.decrement
      LOG.warn("Failed to reconstruct striped block {}",
          reconInfo.getExtendedBlock().getLocalBlock(), e);
    }
  }
}


{code}

  was:
The EC refactoring task failed, and the decrementXmitsInProgress operation will 
be performed twice
 It would be XmitsInProgress of DN has negative number
{code:java}
// code placeholder
{code}


> EC:Reconstruction task failed, and the decrementXmitsInProgress operation 
> will be performed twice
> -------------------------------------------------------------------------------------------------
>
>                 Key: HDFS-15798
>                 URL: https://issues.apache.org/jira/browse/HDFS-15798
>             Project: Hadoop HDFS
>          Issue Type: Bug
>            Reporter: huhaiyang
>            Priority: Major
>
> The EC refactoring task failed, and the decrementXmitsInProgress operation 
> will be performed twice
>  It would be XmitsInProgress of DN has negative number
> {code:java}
> // ErasureCodingWorker.java
> public void processErasureCodingTasks(
>     Collection<BlockECReconstructionInfo> ecTasks) {
>   for (BlockECReconstructionInfo reconInfo : ecTasks) {
>     int xmitsSubmitted = 0;
>     try {
>       StripedReconstructionInfo stripedReconInfo =
>           new StripedReconstructionInfo(
>           reconInfo.getExtendedBlock(), reconInfo.getErasureCodingPolicy(),
>           reconInfo.getLiveBlockIndices(), reconInfo.getSourceDnInfos(),
>           reconInfo.getTargetDnInfos(), reconInfo.getTargetStorageTypes(),
>           reconInfo.getTargetStorageIDs());
>       // It may throw IllegalArgumentException from task#stripedReader
>       // constructor.
>       final StripedBlockReconstructor task =
>           new StripedBlockReconstructor(this, stripedReconInfo);
>       if (task.hasValidTargets()) {
>         // See HDFS-12044. We increase xmitsInProgress even the task is only
>         // enqueued, so that
>         //   1) NN will not send more tasks than what DN can execute and
>         //   2) DN will not throw away reconstruction tasks, and instead keeps
>         //      an unbounded number of tasks in the executor's task queue.
>         xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1);
>         getDatanode().incrementXmitsInProcess(xmitsSubmitted); // increment
>         stripedReconstructionPool.submit(task);
>       } else {
>         LOG.warn("No missing internal block. Skip reconstruction for task:{}",
>             reconInfo);
>       }
>     } catch (Throwable e) {
>       getDatanode().decrementXmitsInProgress(xmitsSubmitted); // if 
> 1.decrement
>       LOG.warn("Failed to reconstruct striped block {}",
>           reconInfo.getExtendedBlock().getLocalBlock(), e);
>     }
>   }
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to