dweiss commented on a change in pull request #1550:
URL: https://github.com/apache/lucene-solr/pull/1550#discussion_r433824063



##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {
+  tempDir = file("temp")
+  workDir = file("work")
+}
+
+task run(type: JavaExec) {
+  description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file 
-PmaxHeapSize=1G)"
+  main 'org.apache.lucene.benchmark.byTask.Benchmark'
+  classpath sourceSets.main.runtimeClasspath
+  // allow these to be specified on the CLI via -PtaskAlg=  for example
+  def taskAlg = project.properties['taskAlg'] ?: 'conf/micro-standard.alg'
+  args = [taskAlg]
+
+  maxHeapSize = project.properties['maxHeapSize'] ?: '1G'
+
+  String stdOutStr = project.properties['standardOutput']
+  if (stdOutStr != null) {
+    standardOutput = new File(stdOutStr).newOutputStream()
+  }
+
+  debugOptions {
+    enabled = false
+    port = 5005
+    suspend = true
+  }
+}
+
+/* Old "collation" Ant target:
+gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg 
-PstandardOutput=work/collation.benchmark.output.txt
+perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
+ */
+
+/* Old "shingle" Ant target:
+gradle reuters run -PtaskAlg=conf/shingle.alg 
-PstandardOutput=work/shingle.benchmark.output.txt
+perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
+ */
+
+// The remaining tasks just get / extract / prepare data
+
+task getEnWiki(type: Download) {
+  src 
"https://home.apache.org/~dsmiley/data/enwiki-20070527-pages-articles.xml.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getGeoNames(type: Download) {
+  // note: latest data is at: 
https://download.geonames.org/export/dump/allCountries.zip
+  //       and then randomize with: gsort -R -S 1500M file.txt > 
file_random.txt
+  //       and then compress with: bzip2 -9 -k file_random.txt
+  src 
"https://home.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getReuters(type: Download) {
+  // note: there is no HTTPS url and we don't care because this is merely 
test/perf data
+  src 
"http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+}
+task extractReuters(type: Copy) {

Review comment:
       Add newline between tasks for clarity?

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')

Review comment:
       Similar to above.

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')

Review comment:
       This is wrong. You used a deprecated configuration name. Please take a 
look at this:
   
   
https://docs.gradle.org/current/userguide/java_plugin.html#sec:java_plugin_and_dependency_management
   
   All these should be implementation dependencies I think.

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {
+  tempDir = file("temp")
+  workDir = file("work")
+}
+
+task run(type: JavaExec) {
+  description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file 
-PmaxHeapSize=1G)"
+  main 'org.apache.lucene.benchmark.byTask.Benchmark'
+  classpath sourceSets.main.runtimeClasspath
+  // allow these to be specified on the CLI via -PtaskAlg=  for example
+  def taskAlg = project.properties['taskAlg'] ?: 'conf/micro-standard.alg'

Review comment:
       Use global function propertyOrDefault which accepts more than just 
project.properties.

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {
+  tempDir = file("temp")
+  workDir = file("work")
+}
+
+task run(type: JavaExec) {
+  description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file 
-PmaxHeapSize=1G)"
+  main 'org.apache.lucene.benchmark.byTask.Benchmark'
+  classpath sourceSets.main.runtimeClasspath
+  // allow these to be specified on the CLI via -PtaskAlg=  for example
+  def taskAlg = project.properties['taskAlg'] ?: 'conf/micro-standard.alg'
+  args = [taskAlg]
+
+  maxHeapSize = project.properties['maxHeapSize'] ?: '1G'
+
+  String stdOutStr = project.properties['standardOutput']
+  if (stdOutStr != null) {
+    standardOutput = new File(stdOutStr).newOutputStream()
+  }
+
+  debugOptions {
+    enabled = false
+    port = 5005
+    suspend = true
+  }
+}
+
+/* Old "collation" Ant target:
+gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg 
-PstandardOutput=work/collation.benchmark.output.txt
+perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
+ */
+
+/* Old "shingle" Ant target:
+gradle reuters run -PtaskAlg=conf/shingle.alg 
-PstandardOutput=work/shingle.benchmark.output.txt
+perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
+ */
+
+// The remaining tasks just get / extract / prepare data
+
+task getEnWiki(type: Download) {
+  src 
"https://home.apache.org/~dsmiley/data/enwiki-20070527-pages-articles.xml.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getGeoNames(type: Download) {
+  // note: latest data is at: 
https://download.geonames.org/export/dump/allCountries.zip
+  //       and then randomize with: gsort -R -S 1500M file.txt > 
file_random.txt
+  //       and then compress with: bzip2 -9 -k file_random.txt
+  src 
"https://home.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getReuters(type: Download) {
+  // note: there is no HTTPS url and we don't care because this is merely 
test/perf data
+  src 
"http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+}
+task extractReuters(type: Copy) {
+  dependsOn getReuters
+  from(tarTree(getReuters.dest)) { // can expand a .gz on the fly
+    exclude '*.txt'
+  }
+  into file("$workDir/reuters")
+}
+task reuters(type: JavaExec) {
+  dependsOn extractReuters
+  def input = extractReuters.outputs.files[0]

Review comment:
       A few problems here - this runs at configuration time so you can't take 
outputs of another task right away (yes, it'll work but it's not right). 
Besides, inputs and outputs don't need to be declared for these tasks at all so 
I'd just leave out inputs.dir and outputs.dir entirely. The task will just 
always execute.
   
   Args should be moved to doFirst or use absolute location (not the depending 
task's resolved outputs).

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {
+  tempDir = file("temp")
+  workDir = file("work")
+}
+
+task run(type: JavaExec) {
+  description "Run a perf test (optional: -PtaskAlg=conf/your-algorithm-file 
-PmaxHeapSize=1G)"
+  main 'org.apache.lucene.benchmark.byTask.Benchmark'
+  classpath sourceSets.main.runtimeClasspath
+  // allow these to be specified on the CLI via -PtaskAlg=  for example
+  def taskAlg = project.properties['taskAlg'] ?: 'conf/micro-standard.alg'
+  args = [taskAlg]
+
+  maxHeapSize = project.properties['maxHeapSize'] ?: '1G'
+
+  String stdOutStr = project.properties['standardOutput']
+  if (stdOutStr != null) {
+    standardOutput = new File(stdOutStr).newOutputStream()
+  }
+
+  debugOptions {
+    enabled = false
+    port = 5005
+    suspend = true
+  }
+}
+
+/* Old "collation" Ant target:
+gradle getTop100kWikiWordFiles run -PtaskAlg=conf/collation.alg 
-PstandardOutput=work/collation.benchmark.output.txt
+perl -CSD scripts/collation.bm2jira.pl work/collation.benchmark.output.txt
+ */
+
+/* Old "shingle" Ant target:
+gradle reuters run -PtaskAlg=conf/shingle.alg 
-PstandardOutput=work/shingle.benchmark.output.txt
+perl -CSD scripts/shingle.bm2jira.pl work/shingle.benchmark.output.txt
+ */
+
+// The remaining tasks just get / extract / prepare data
+
+task getEnWiki(type: Download) {
+  src 
"https://home.apache.org/~dsmiley/data/enwiki-20070527-pages-articles.xml.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getGeoNames(type: Download) {
+  // note: latest data is at: 
https://download.geonames.org/export/dump/allCountries.zip
+  //       and then randomize with: gsort -R -S 1500M file.txt > 
file_random.txt
+  //       and then compress with: bzip2 -9 -k file_random.txt
+  src 
"https://home.apache.org/~dsmiley/data/geonames_20130921_randomOrder_allCountries.txt.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    ant.bunzip2(src: dest, dest: tempDir) // will chop off .bz2
+  }
+}
+
+task getReuters(type: Download) {
+  // note: there is no HTTPS url and we don't care because this is merely 
test/perf data
+  src 
"http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+}
+task extractReuters(type: Copy) {
+  dependsOn getReuters
+  from(tarTree(getReuters.dest)) { // can expand a .gz on the fly
+    exclude '*.txt'
+  }
+  into file("$workDir/reuters")
+}
+task reuters(type: JavaExec) {
+  dependsOn extractReuters
+  def input = extractReuters.outputs.files[0]
+  def output = "$workDir/reuters-out"
+  inputs.dir(input)
+  outputs.dir(output)
+  main = 'org.apache.lucene.benchmark.utils.ExtractReuters'
+  classpath = sourceSets.main.runtimeClasspath
+  jvmArgs = ['-Xmx1G']
+  args = [input, output]
+
+  doFirst {
+    file(output).deleteDir()
+    println "Extracting reuters to $output"
+  }
+}
+
+task getTop100kWikiWordFiles(type: Download) {
+  src 
"https://home.apache.org/~rmuir/wikipedia/top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2";
+  dest file("$tempDir/${src.file.split('/').last()}")
+  overwrite false
+  compress false
+
+  doLast {
+    copy {

Review comment:
       sync rather than copy?

##########
File path: lucene/benchmark/build.gradle
##########
@@ -15,27 +15,138 @@
  * limitations under the License.
  */
 
-
-apply plugin: 'java-library'
+apply plugin: 'java'
+// NOT a 'java-library'.  Maybe 'application' but seems too limiting.
 
 description = 'System for benchmarking Lucene'
 
 dependencies {  
-  api project(':lucene:core')
-
-  implementation project(':lucene:analysis:common')
-  implementation project(':lucene:facet')
-  implementation project(':lucene:highlighter')
-  implementation project(':lucene:queries')
-  implementation project(':lucene:spatial-extras')
-  implementation project(':lucene:queryparser')
-
-  implementation "org.apache.commons:commons-compress"
-  implementation "com.ibm.icu:icu4j"
-  implementation "org.locationtech.spatial4j:spatial4j"
-  implementation("net.sourceforge.nekohtml:nekohtml", {
+  compile project(':lucene:core')
+
+  compile project(':lucene:analysis:common')
+  compile project(':lucene:facet')
+  compile project(':lucene:highlighter')
+  compile project(':lucene:queries')
+  compile project(':lucene:spatial-extras')
+  compile project(':lucene:queryparser')
+
+  compile "org.apache.commons:commons-compress"
+  compile "com.ibm.icu:icu4j"
+  compile "org.locationtech.spatial4j:spatial4j"
+  compile("net.sourceforge.nekohtml:nekohtml", {
     exclude module: "xml-apis"
   })
 
-  testImplementation project(':lucene:test-framework')
+  runtime project(':lucene:analysis:icu')
+
+  testCompile project(':lucene:test-framework')
+}
+
+ext {

Review comment:
       Only declare externalized properties if they really have to be 
externalized (read from somewhere outside the script). Here it's fine to just 
declare variables.
   
   def tempDir = project.file("temp")




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to