[GitHub] [lucene-solr] madrob commented on a change in pull request #1550: LUCENE-9383: benchmark module: Gradle conversion (complete)

GitBox Mon, 01 Jun 2020 07:47:48 -0700


madrob commented on a change in pull request #1550:
URL: https://github.com/apache/lucene-solr/pull/1550#discussion_r433278450




##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {
+  tempDir = file("temp")
+  workDir = file("work")
+}
+
+task run(type: JavaExec) {
+  description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file 
-PmaxHeapSize=1G)"
+  main 'org.apache.lucene.benchmark.byTask.Benchmark'
+  classpath sourceSets.main.runtimeClasspath
+  // allow these to be specified on the CLI via -PtaskAlg=  for example
+  def taskAlg = project.properties['taskAlg'] ?: 'conf/micro-standard.alg'
+  args = [taskAlg]
+
+  maxHeapSize = project.properties['maxHeapSize'] ?: '1G'
+
+  String stdOutStr = project.properties['standardOutput']
+  if (stdOutStr != null) {
+    standardOutput = new File(stdOutStr).newOutputStream()
+  }
+
+  debugOptions {
+    enabled = false
+    port = 5005
+    suspend = true
+  }
+}
+
+/* Old "collation" Ant target:
+gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg 
-PstandardOutput=work/collation.benchmark.output.txt
+perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
+ */
+
+/* Old "shingle" Ant target:
+gradle reuters run -PtaskAlg=conf/shingle.alg 
-PstandardOutput=work/shingle.benchmark.output.txt
+perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
+ */
+
+// The remaining tasks just get / extract / prepare data
+
+task getEnWiki(type: Download) {
+  src 
"https://home.apache.org/~dsmiley/data/enwiki-20070527-pages-articles.xml.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getGeoNames(type: Download) {
+  // note: latest data is at: 
https://download.geonames.org/export/dump/allCountries.zip
+  //       and then randomize with: gsort -R -S 1500M file.txt > 
file_random.txt
+  //       and then compress with: bzip2 -9 -k file_random.txt
+  src 
"https://home.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getReuters(type: Download) {
+  // note: there is no HTTPS url and we don't care because this is merely 
test/perf data
+  src 
"http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+}
+task extractReuters(type: Copy) {
+  dependsOn getReuters
+  from(tarTree(getReuters.dest)) { // can expand a .gz on the fly
+    exclude '*.txt'
+  }
+  into file("$workDir/reuters")
+}
+task reuters(type: JavaExec) {
+  dependsOn extractReuters
+  def input = extractReuters.outputs.files[0]
+  def output = "$workDir/reuters-out"
+  inputs.dir(input)
+  outputs.dir(output)
+  main = 'org.apache.lucene.benchmark.utils.ExtractReuters'
+  classpath = sourceSets.main.runtimeClasspath
+  jvmArgs = ['-Xmx1G']

Review comment:
       Use `maxHeapSize`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

[GitHub] [lucene-solr] madrob commented on a change in pull request #1550: LUCENE-9383: benchmark module: Gradle conversion (complete)

Reply via email to