RepositoryExternal.mk                               |    1 
 external/clucene/UnpackedTarball_clucene.mk         |    1 
 external/clucene/patches/clucene-reprobuild.patch.1 |   61 ++++++++++++++++++++
 helpcompiler/source/HelpIndexer.cxx                 |    5 +
 4 files changed, 68 insertions(+)

New commits:
commit 018070500f7f001233f5622d656fc29494620d3f
Author:     Thorsten Behrens <thorsten.behr...@allotropia.de>
AuthorDate: Thu Nov 2 18:17:50 2023 +0100
Commit:     Thorsten Behrens <thorsten.behr...@allotropia.de>
CommitDate: Thu Dec 14 03:09:08 2023 +0100

    reprobuild: don't write timestamps to clucene index files
    
    Our embedded clucene by default write a random current-time
    millisecond value into version fields, in an attempt to
    randomise. Clearly this is not needed for our static help, and it also
    prevents builds from being reproducible.
    
    Change-Id: I011388b5bc72b5d86bc1900f5439036ede60c020
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158845
    Tested-by: Jenkins
    Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160668

diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index 9659ec7c163f..da11a7d153f2 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -2847,6 +2847,7 @@ ifneq ($(SYSTEM_CLUCENE),)
 define gb_LinkTarget__use_clucene
 $(call gb_LinkTarget_add_defs,$(1),\
        $(filter-out -I% -isystem%,$(subst -isystem 
/,-isystem/,$(CLUCENE_CFLAGS))) \
+       -DSYSTEM_CLUCENE \
 )
 
 $(call gb_LinkTarget_set_include,$(1),\
diff --git a/external/clucene/UnpackedTarball_clucene.mk 
b/external/clucene/UnpackedTarball_clucene.mk
index 7a1eed67d2f7..76fdfe6aca56 100644
--- a/external/clucene/UnpackedTarball_clucene.mk
+++ b/external/clucene/UnpackedTarball_clucene.mk
@@ -54,6 +54,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,clucene,\
        external/clucene/patches/nullstring.patch \
        external/clucene/patches/binary_function.patch \
        external/clucene/patches/clucene-pure-virtual.patch \
+       external/clucene/patches/clucene-reprobuild.patch.1 \
 ))
 
 ifneq ($(OS),WNT)
diff --git a/external/clucene/patches/clucene-reprobuild.patch.1 
b/external/clucene/patches/clucene-reprobuild.patch.1
new file mode 100644
index 000000000000..2c5a0b95135f
--- /dev/null
+++ b/external/clucene/patches/clucene-reprobuild.patch.1
@@ -0,0 +1,61 @@
+diff -ur clucene.org/src/core/CLucene/index/IndexWriter.cpp 
clucene/src/core/CLucene/index/IndexWriter.cpp
+--- clucene.org/src/core/CLucene/index/IndexWriter.cpp 2023-11-02 
17:31:00.110168174 +0100
++++ clucene/src/core/CLucene/index/IndexWriter.cpp     2023-11-02 
17:33:22.507665912 +0100
+@@ -366,6 +366,10 @@
+   }
+ }
+ 
++void IndexWriter::setSegmentInfoStartVersion(int64_t startVersion) {
++  this->segmentInfos->setStartVersion(startVersion);
++}
++
+ int32_t IndexWriter::getMaxBufferedDocs() {
+   ensureOpen();
+   return docWriter->getMaxBufferedDocs();
+diff -ur clucene.org/src/core/CLucene/index/IndexWriter.h 
clucene/src/core/CLucene/index/IndexWriter.h
+--- clucene.org/src/core/CLucene/index/IndexWriter.h   2023-11-02 
17:31:00.113501525 +0100
++++ clucene/src/core/CLucene/index/IndexWriter.h       2023-11-02 
17:33:43.547787510 +0100
+@@ -336,6 +336,12 @@
+   int64_t getWriteLockTimeout();
+ 
+   /**
++   * Sets the 0th segmentinfo version. Default is current system time
++   * in milliseconds
++   */
++  void setSegmentInfoStartVersion(int64_t startVersion);
++
++  /**
+    * Sets the maximum time to wait for a commit lock (in milliseconds).
+    */
+   void setCommitLockTimeout(int64_t commitLockTimeout);
+diff -ur clucene.org/src/core/CLucene/index/SegmentInfos.cpp 
clucene/src/core/CLucene/index/SegmentInfos.cpp
+--- clucene.org/src/core/CLucene/index/SegmentInfos.cpp        2023-11-02 
17:31:00.110168174 +0100
++++ clucene/src/core/CLucene/index/SegmentInfos.cpp    2023-11-02 
18:04:43.855243418 +0100
+@@ -662,6 +662,10 @@
+         return IndexFileNames::fileNameFromGeneration( 
IndexFileNames::SEGMENTS, "", nextGeneration );
+   }
+ 
++  void SegmentInfos::setStartVersion(int64_t version) {
++      this->version = version;
++  }
++
+   void SegmentInfos::clearto(size_t from, size_t end){
+       size_t range = end - from;
+       if ( (infos.size() - from) >= range) { // Make sure we actually need to 
remove
+diff -ur clucene.org/src/core/CLucene/index/_SegmentInfos.h 
clucene/src/core/CLucene/index/_SegmentInfos.h
+--- clucene.org/src/core/CLucene/index/_SegmentInfos.h 2023-11-02 
17:31:00.106834824 +0100
++++ clucene/src/core/CLucene/index/_SegmentInfos.h     2023-11-02 
18:04:51.178598463 +0100
+@@ -347,6 +347,13 @@
+               */
+               std::string getNextSegmentFileName();
+ 
++              /**
++              * Set version value to start from
++
++          Defaults to current time in milliseconds
++              */
++        void setStartVersion(int64_t version);
++
+               /* public vector-like operations */
+               //delete and clears objects 'from' from to 'to'
+               void clearto(size_t to, size_t end);
diff --git a/helpcompiler/source/HelpIndexer.cxx 
b/helpcompiler/source/HelpIndexer.cxx
index 44506bddfc31..65e46743b482 100644
--- a/helpcompiler/source/HelpIndexer.cxx
+++ b/helpcompiler/source/HelpIndexer.cxx
@@ -106,6 +106,11 @@ bool HelpIndexer::indexDocuments()
                                                                    
analyzer.get(), true);
 #endif
 
+#ifndef SYSTEM_CLUCENE
+        // avoid random values in index file, making help indices reproducible
+        writer->setSegmentInfoStartVersion(0);
+#endif
+
         //Double limit of tokens allowed, otherwise we'll get a too-many-tokens
         //exception for ja help. Could alternative ignore the exception and get
         //truncated results as per java-Lucene apparently

Reply via email to