Author: centic
Date: Mon Jul 15 05:40:56 2024
New Revision: 1919236

URL: http://svn.apache.org/viewvc?rev=1919236&view=rev
Log:
Add initial support for SOURCE_DATE_EPOCH

This allows to create reproducible binary files without 
creation/modification-timestamp
being set when environment variable SOURCE_DATE_EPOCH is set.

See https://reproducible-builds.org/docs/source-date-epoch/ for the related 
specification.

For now, we ensure that Zip-file entries set the modification time to 
1970-01-01,
which seems to be enough to make simple OOXML files reproducible.

There are likely some other places where resulting files are not reproducible, 
some
more testing will be necessary to identify other areas that should take this 
into
account as well.

Added:
    poi/trunk/poi/src/main/java/org/apache/poi/util/Reproducibility.java
Modified:
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipContentTypeManager.java
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPackagePropertiesMarshaller.java
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java
    
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
    
poi/trunk/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipContentTypeManager.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipContentTypeManager.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipContentTypeManager.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipContentTypeManager.java
 Mon Jul 15 05:40:56 2024
@@ -61,6 +61,8 @@ public class ZipContentTypeManager exten
 
         ZipArchiveEntry partEntry = new 
ZipArchiveEntry(CONTENT_TYPES_PART_NAME);
         try {
+            ZipHelper.adjustEntryTime(partEntry);
+
             // Referenced in ZIP
             zos.putArchiveEntry(partEntry);
             try {

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
 Mon Jul 15 05:40:56 2024
@@ -18,7 +18,6 @@
 package org.apache.poi.openxml4j.opc.internal;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -38,6 +37,7 @@ import org.apache.poi.openxml4j.util.Zip
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.util.Internal;
+import org.apache.poi.util.Reproducibility;
 
 @Internal
 public final class ZipHelper {
@@ -69,7 +69,9 @@ public final class ZipHelper {
             return null;
         }
 
-        return new ZipArchiveEntry(corePropsRel.getTargetURI().getPath());
+        ZipArchiveEntry entry = new 
ZipArchiveEntry(corePropsRel.getTargetURI().getPath());
+        ZipHelper.adjustEntryTime(entry);
+        return entry;
     }
 
     /**
@@ -133,10 +135,10 @@ public final class ZipHelper {
             return null;
         }
     }
-    
+
     /**
      * Verifies that the given stream starts with a Zip structure.
-     * 
+     *
      * Warning - this will consume the first few bytes of the stream,
      *  you should push-back or reset the stream after use!
      */
@@ -195,7 +197,7 @@ public final class ZipHelper {
     }
 
     /**
-     * Opens the specified file as a secure zip, or returns null if no 
+     * Opens the specified file as a secure zip, or returns null if no
      *  such file exists
      *
      * @param file
@@ -211,7 +213,7 @@ public final class ZipHelper {
         if (file.isDirectory()) {
             throw new IOException("File is a directory");
         }
-        
+
         // Peek at the first few bytes to sanity check
         try (InputStream input = Files.newInputStream(file.toPath())) {
             verifyZipHeader(input);
@@ -231,4 +233,17 @@ public final class ZipHelper {
     public static ZipSecureFile openZipFile(String path) throws IOException {
         return openZipFile(new File(path));
     }
+
+    /**
+     * If environment-variable SOURCE_DATE_EPOCH is set, we use "0" for the
+     * time of the entry.
+     *
+     * @param entry The zip-entry to adjust
+     */
+    public static void adjustEntryTime(ZipArchiveEntry entry) {
+        // if SOURCE_DATE_EPOCH is set, we set the time-field to zero
+        if (Reproducibility.isSourceDateEpoch()) {
+            entry.setTime(0);
+        }
+    }
 }

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPackagePropertiesMarshaller.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPackagePropertiesMarshaller.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPackagePropertiesMarshaller.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPackagePropertiesMarshaller.java
 Mon Jul 15 05:40:56 2024
@@ -45,6 +45,8 @@ public final class ZipPackagePropertiesM
                 .getZipItemNameFromOPCName(part.getPartName().getURI()
                         .toString()));
         try {
+            ZipHelper.adjustEntryTime(ctEntry);
+
             // Save in ZIP
             zos.putArchiveEntry(ctEntry); // Add entry in ZIP
             try {

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java
 Mon Jul 15 05:40:56 2024
@@ -83,6 +83,8 @@ public final class ZipPartMarshaller imp
                 .getZipItemNameFromOPCName(part.getPartName().getURI()
                         .getPath()));
         try {
+            ZipHelper.adjustEntryTime(partEntry);
+
             // Create next zip entry
             zos.putArchiveEntry(partEntry);
 
@@ -187,6 +189,8 @@ public final class ZipPartMarshaller imp
         ZipArchiveEntry ctEntry = new 
ZipArchiveEntry(ZipHelper.getZipURIFromOPCName(
                 relPartName.getURI().toASCIIString()).getPath());
         try {
+            ZipHelper.adjustEntryTime(ctEntry);
+
             zos.putArchiveEntry(ctEntry);
             try {
                 return StreamHelper.saveXmlInStream(xmlOutDoc, zos);

Modified: 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
 (original)
+++ 
poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
 Mon Jul 15 05:40:56 2024
@@ -35,7 +35,6 @@ import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Spliterator;
 
-import org.apache.commons.compress.archivers.ArchiveOutputStream;
 import org.apache.commons.compress.archivers.zip.Zip64Mode;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
@@ -424,7 +423,7 @@ public class SXSSFWorkbook implements Wo
     }
 
     protected void injectData(ZipEntrySource zipEntrySource, OutputStream out) 
throws IOException {
-        ArchiveOutputStream zos = createArchiveOutputStream(out);
+        ZipArchiveOutputStream zos = createArchiveOutputStream(out);
         try {
             Enumeration<? extends ZipArchiveEntry> en = 
zipEntrySource.getEntries();
             while (en.hasMoreElements()) {
@@ -785,7 +784,7 @@ public class SXSSFWorkbook implements Wo
          * has been created. Support for the remove method may be added in the 
future
          * if someone can figure out a reliable implementation.
          *
-         * @throws UnsupportedOperationException
+         * @throws UnsupportedOperationException Always thrown in this 
implementation
          */
         @Override
         public void remove() throws IllegalStateException {

Added: poi/trunk/poi/src/main/java/org/apache/poi/util/Reproducibility.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi/src/main/java/org/apache/poi/util/Reproducibility.java?rev=1919236&view=auto
==============================================================================
--- poi/trunk/poi/src/main/java/org/apache/poi/util/Reproducibility.java (added)
+++ poi/trunk/poi/src/main/java/org/apache/poi/util/Reproducibility.java Mon 
Jul 15 05:40:56 2024
@@ -0,0 +1,78 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+
+import java.io.IOException;
+
+import org.apache.commons.io.function.IORunnable;
+
+/**
+ * Helper class for allowing to produce so called
+ * "reproducible" output.
+ *
+ * I.e. multiple runs of the same steps should
+ * produce the same byte-by-byte result.
+ *
+ * This usually means that among other "randomness"
+ * timestamp should be avoided.
+ *
+ * This class provides a few useful bits to allow Apache POI to produce
+ * reproducible binary files.
+ *
+ * See https://reproducible-builds.org/ for more details.
+ */
+public class Reproducibility {
+    // Add some support for reproducible output files
+    // if SOURCE_DATE_EPOCH is set, we use timestamp "0" for
+    // entries in Zip files
+    // See https://reproducible-builds.org/docs/source-date-epoch/
+    // for the specification of SOURCE_DATE_EPOCH
+    private static boolean IS_SOURCE_DATE_EPOCH =
+            System.getenv("SOURCE_DATE_EPOCH") != null;
+
+    /**
+     * Check if the environment variable SOURCE_DATE_EPOCH is set.
+     *
+     * @return True if set, false otherwise
+     */
+    public static boolean isSourceDateEpoch() {
+        return IS_SOURCE_DATE_EPOCH;
+    }
+
+    /**
+     * Execute a runnable with SOURCE_DATE_EPOCH set.
+     *
+     * This is mostly only used in tests to check reproducibility
+     * of documents.
+     *
+     * @param r A runnable which executes the wanted steps with
+     *          SOURCE_DATE_EPOCH defined
+     *
+     * @throws IOException if executing the runnable throws an IOException
+     * @throws RuntimeException if executing the runnable throws a 
RuntimeException
+     */
+    public static void runWithSourceDateEpoch(IORunnable r) throws IOException 
{
+        boolean before = IS_SOURCE_DATE_EPOCH;
+        IS_SOURCE_DATE_EPOCH = true;
+        try {
+            r.run();
+        } finally {
+            IS_SOURCE_DATE_EPOCH = before;
+        }
+    }
+}

Modified: 
poi/trunk/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java?rev=1919236&r1=1919235&r2=1919236&view=diff
==============================================================================
--- 
poi/trunk/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
 (original)
+++ 
poi/trunk/poi/src/test/java/org/apache/poi/ss/usermodel/BaseTestBugzillaIssues.java
 Mon Jul 15 05:40:56 2024
@@ -42,7 +42,6 @@ import org.apache.commons.io.output.Unsy
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.ss.ITestDataProvider;
 import org.apache.poi.ss.SpreadsheetVersion;
 import org.apache.poi.ss.formula.FormulaParseException;
@@ -51,6 +50,7 @@ import org.apache.poi.ss.util.CellRangeA
 import org.apache.poi.ss.util.CellRangeAddressList;
 import org.apache.poi.ss.util.PaneInformation;
 import org.apache.poi.ss.util.SheetUtil;
+import org.apache.poi.util.Reproducibility;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
@@ -1815,70 +1815,23 @@ public abstract class BaseTestBugzillaIs
             cell.setCellValue("Ernie & Bert are cool!");
             cell.setCellFormula("A1 & \" are cool!\"");
 
-            try (UnsynchronizedByteArrayOutputStream out1 = 
UnsynchronizedByteArrayOutputStream.builder().get();
-                 UnsynchronizedByteArrayOutputStream out2 = 
UnsynchronizedByteArrayOutputStream.builder().get()) {
-                wb.write(out1);
-                wb.write(out2);
-
-                out1.flush();
-                out2.flush();
-
-                // to avoid flaky tests if the documents are written at 
slightly different timestamps
-                // we clear some bytes which contain timestamps
-                assertArrayEquals(
-                        removeTimestamp(out1.toByteArray()),
-                        removeTimestamp(out2.toByteArray()));
-            }
-        }
-    }
-
-    private byte[] removeTimestamp(byte[] bytes) {
-        if (FileMagic.valueOf(bytes) == FileMagic.OOXML) {
-            // This removes the timestamp in the header of the ZIP-Format
-            // see "Local file header" at 
https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
-            bytes[10] = 0;
-            bytes[11] = 0;
-            bytes[12] = 0;
-            bytes[13] = 0;
-
-            // there is a timestamp for every entry, so try to replace a few 
more byte-positions
-            // to reduce flakiness of this test, however we likely do not yet 
cover all entries
-            bytes[390] = 0;
-            bytes[391] = 0;
-            bytes[674] = 0;
-            bytes[676] = 0;
-            bytes[883] = 0;
-            bytes[1207] = 0;
-            bytes[1208] = 0;
-            bytes[1432] = 0;
-            bytes[1433] = 0;
-            bytes[1434] = 0;
-            bytes[1817] = 0;
-            bytes[1818] = 0;
-            bytes[2098] = 0;
-            bytes[2099] = 0;
-            bytes[2762] = 0;
-            bytes[2763] = 0;
-            bytes[2382] = 0;
-            bytes[2383] = 0;
-            bytes[2827] = 0;
-            bytes[2828] = 0;
-            bytes[2884] = 0;
-            bytes[2885] = 0;
-            bytes[2946] = 0;
-            bytes[2947] = 0;
-            bytes[3009] = 0;
-            bytes[3010] = 0;
-            bytes[3075] = 0;
-            bytes[3076] = 0;
-            bytes[3134] = 0;
-            bytes[3135] = 0;
-            bytes[3195] = 0;
-            bytes[3196] = 0;
-            bytes[3267] = 0;
-            bytes[3268] = 0;
+            Reproducibility.runWithSourceDateEpoch(
+                    () -> {
+                        try (UnsynchronizedByteArrayOutputStream out1 = 
UnsynchronizedByteArrayOutputStream.builder().get();
+                                UnsynchronizedByteArrayOutputStream out2 = 
UnsynchronizedByteArrayOutputStream.builder().get()) {
+                            wb.write(out1);
+                            wb.write(out2);
+
+                            out1.flush();
+                            out2.flush();
+
+                            // to avoid flaky tests if the documents are 
written at slightly different timestamps
+                            // we clear some bytes which contain timestamps
+                            assertArrayEquals(
+                                    out1.toByteArray(),
+                                    out2.toByteArray());
+                        }
+                    });
         }
-
-        return bytes;
     }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to