[ 
https://issues.apache.org/jira/browse/HDDS-9043?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sadanand Shenoy reassigned HDDS-9043:
-------------------------------------

    Assignee: Sadanand Shenoy

> [snapshot] Distcp throws DuplicateFileException when files are deleted in 
> source directory
> ------------------------------------------------------------------------------------------
>
>                 Key: HDDS-9043
>                 URL: https://issues.apache.org/jira/browse/HDDS-9043
>             Project: Apache Ozone
>          Issue Type: Bug
>          Components: Ozone Manager
>            Reporter: Jyotirmoy Sinha
>            Assignee: Sadanand Shenoy
>            Priority: Major
>              Labels: ozone-snapshot
>
> Steps :
>  # Create source vol/buck/key
>  # Create destination vol/buck
>  # Run base replication distcp from source to destination
>  # Create snapshot snap1 on both source and destination dirs
>  # Delete key from source bucket and create snapshot snap2
>  # Run snapshot distcp from source to destination bucket with snap1 snap2
> Filesystem after step 3 -
> {code:java}
> [root@quasar-vebabo-1 ~]# ozone fs -ls -R ofs://ozone1/vola*
> drwxrwxrwx   - systest systest          0 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1/key1
> drwxrwxrwx   - systest systest          0 2023-07-19 07:20 
> ofs://ozone1/vola2/bucka2
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:21 
> ofs://ozone1/vola2/bucka2/key1 {code}
> Filesystem after step 5 -
> {code:java}
> [root@quasar-vebabo-1 ~]# ozone fs -ls -R ofs://ozone1/vola*
> drwxrwxrwx   - systest systest          0 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash/systest
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash/systest/Current
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1/.Trash/systest/Current/key1
> drwxrwxrwx   - systest systest          0 2023-07-19 07:20 
> ofs://ozone1/vola2/bucka2
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:21 
> ofs://ozone1/vola2/bucka2/key1 {code}
> Filesystem after step 6 -
> {code:java}
> [root@quasar-vebabo-1 ~]# ozone fs -ls -R ofs://ozone1/vola*
> drwxrwxrwx   - systest systest          0 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash/systest
> drwxrwxrwx   - systest systest          0 2023-07-19 07:23 
> ofs://ozone1/vola1/bucka1/.Trash/systest/Current
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:19 
> ofs://ozone1/vola1/bucka1/.Trash/systest/Current/key1
> drwxrwxrwx   - systest systest          0 2023-07-19 07:20 
> ofs://ozone1/vola2/bucka2
> drwxrwxrwx   - systest systest          0 2023-07-19 07:27 
> ofs://ozone1/vola2/bucka2/.Trash
> drwxrwxrwx   - systest systest          0 2023-07-19 07:27 
> ofs://ozone1/vola2/bucka2/.Trash/systest
> drwxrwxrwx   - systest systest          0 2023-07-19 07:27 
> ofs://ozone1/vola2/bucka2/.Trash/systest/Current
> -rw-rw-rw-   3 systest systest        672 2023-07-19 07:21 
> ofs://ozone1/vola2/bucka2/.Trash/systest/Current/key1 {code}
> Distcp command output -
> {code:java}
> [root@quasar-vebabo-1 ~]# hadoop distcp -update -diff snap1 snap2 
> ofs://ozone1/vola1/bucka1 ofs://ozone1/vola2/bucka2
> 23/07/19 07:26:20 INFO tools.DistCp: Input Options: 
> DistCpOptions{atomicCommit=false, syncFolder=true, deleteMissing=false, 
> ignoreFailures=false, overwrite=false, append=false, useDiff=true, 
> useRdiff=false, fromSnapshot=snap1, toSnapshot=snap2, skipCRC=false, 
> blocking=true, numListstatusThreads=0, maxMaps=20, mapBandwidth=0.0, 
> copyStrategy='uniformsize', preserveStatus=[], atomicWorkPath=null, 
> logPath=null, sourceFileListing=null, 
> sourcePaths=[ofs://ozone1/vola1/bucka1], 
> targetPath=ofs://ozone1/vola2/bucka2, filtersFile='null', blocksPerChunk=0, 
> copyBufferSize=8192, verboseLog=false, directWrite=false, useiterator=false}, 
> sourcePaths=[ofs://ozone1/vola1/bucka1], targetPathExists=true, 
> preserveRawXattrsfalse
> 23/07/19 07:27:22 INFO kms.KMSClientProvider: New token created: (Kind: 
> kms-dt, Service: 
> kms://[email protected]:9494/kms, Ident: 
> (kms-dt owner=systest, renewer=yarn, realUser=, issueDate=1689751642718, 
> maxDate=1690356442718, sequenceNumber=9, masterKeyId=2))
> 23/07/19 07:27:22 INFO security.TokenCache: Got dt for ofs://ozone1; Kind: 
> OzoneToken, Service: 
> 172.27.128.65:9862,172.27.191.208:9862,172.27.204.65:9862, Ident: (OzoneToken 
> [email protected], renewer=yarn, realUser=, 
> issueDate=2023-07-19T07:27:22.313Z, maxDate=2023-07-26T07:27:22.313Z, 
> sequenceNumber=5, masterKeyId=1, strToSign=null, signature=null, 
> awsAccessKeyId=null, omServiceId=ozone1, omCertSerialId=52311743208636877)
> 23/07/19 07:27:22 INFO security.TokenCache: Got dt for ofs://ozone1; Kind: 
> kms-dt, Service: 
> kms://[email protected];quasar-vebabo-2.quasar-vebabo.root.hwx.site:9494/kms,
>  Ident: (kms-dt owner=systest, renewer=yarn, realUser=, 
> issueDate=1689751642718, maxDate=1690356442718, sequenceNumber=9, 
> masterKeyId=2)
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Starting: Building listing 
> using multi threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Building listing using multi 
> threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2: duration 
> 0:00.067s
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Starting: Building listing 
> using multi threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Building listing using multi 
> threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2: duration 
> 0:00.019s
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Starting: Building listing 
> using multi threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2
> 23/07/19 07:27:23 INFO tools.SimpleCopyListing: Building listing using multi 
> threaded approach for ofs://ozone1/vola1/bucka1/.snapshot/snap2: duration 
> 0:00.012s
> 23/07/19 07:27:23 INFO Configuration.deprecation: io.sort.mb is deprecated. 
> Instead, use mapreduce.task.io.sort.mb
> 23/07/19 07:27:23 INFO Configuration.deprecation: io.sort.factor is 
> deprecated. Instead, use mapreduce.task.io.sort.factor
> 23/07/19 07:27:23 ERROR tools.DistCp: Duplicate files in input path:
> org.apache.hadoop.tools.CopyListing$DuplicateFileException: File 
> ofs://ozone1/vola1/bucka1/.snapshot/snap2/.Trash/systest and 
> ofs://ozone1/vola1/bucka1/.snapshot/snap2/.Trash/systest would cause 
> duplicates. Aborting
>     at 
> org.apache.hadoop.tools.CopyListing.validateFinalListing(CopyListing.java:175)
>     at org.apache.hadoop.tools.CopyListing.buildListing(CopyListing.java:93)
>     at 
> org.apache.hadoop.tools.DistCp.createInputFileListingWithDiff(DistCp.java:397)
>     at org.apache.hadoop.tools.DistCp.prepareFileListing(DistCp.java:89)
>     at org.apache.hadoop.tools.DistCp.createAndSubmitJob(DistCp.java:216)
>     at org.apache.hadoop.tools.DistCp.execute(DistCp.java:193)
>     at org.apache.hadoop.tools.DistCp.run(DistCp.java:155)
>     at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:81)
>     at org.apache.hadoop.tools.DistCp.main(DistCp.java:445) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to