[solr] branch main updated: SOLR-16028: Enable spotless on langid module

krisden Mon, 21 Feb 2022 09:55:52 -0800

This is an automated email from the ASF dual-hosted git repository.

krisden pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/main by this push:
     new 335fb28  SOLR-16028: Enable spotless on langid module
335fb28 is described below

commit 335fb288a780e9be51891886807df9eb3bf16873
Author: Kevin Risden <[email protected]>
AuthorDate: Sat Feb 19 10:42:45 2022 -0500

    SOLR-16028: Enable spotless on langid module
---
 gradle/validation/spotless.gradle                  |   1 -
 .../solr/update/processor/DetectedLanguage.java    |  10 +-
 ...angDetectLanguageIdentifierUpdateProcessor.java |  33 +--
 ...ctLanguageIdentifierUpdateProcessorFactory.java |  51 ++--
 .../apache/solr/update/processor/LangIdParams.java |  60 +++--
 .../LanguageIdentifierUpdateProcessor.java         | 202 ++++++++++------
 .../OpenNLPLangDetectUpdateProcessor.java          |  37 +--
 .../OpenNLPLangDetectUpdateProcessorFactory.java   |  37 +--
 .../update/processor/SolrInputDocumentReader.java  |  64 +++--
 .../TikaLanguageIdentifierUpdateProcessor.java     |  33 +--
 ...kaLanguageIdentifierUpdateProcessorFactory.java |  40 ++--
 ...nguageIdentifierUpdateProcessorFactoryTest.java | 126 ++++++++--
 ...geIdentifierUpdateProcessorFactoryTestCase.java | 266 ++++++++++++++++-----
 ...penNLPLangDetectUpdateProcessorFactoryTest.java |  64 ++++-
 .../processor/SolrInputDocumentReaderTest.java     |  39 +--
 ...nguageIdentifierUpdateProcessorFactoryTest.java |  44 ++--
 16 files changed, 730 insertions(+), 377 deletions(-)

diff --git a/gradle/validation/spotless.gradle 
b/gradle/validation/spotless.gradle
index c1c068e..a69520c 100644
--- a/gradle/validation/spotless.gradle
+++ b/gradle/validation/spotless.gradle
@@ -47,7 +47,6 @@ configure(project(":solr").subprojects) { prj ->
           case ":solr:modules:gcs-repository":
           case ":solr:modules:hadoop-auth":
           case ":solr:modules:hdfs":
-          case ":solr:modules:langid":
           case ":solr:modules:scripting":
           case ":solr:modules:sql":
           case ":solr:core":
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
index e8e6fbe..07d4e75 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/DetectedLanguage.java
@@ -16,20 +16,19 @@
  */
 package org.apache.solr.update.processor;
 
-/**
- * Bean holding a language and a detection certainty 
- */
+/** Bean holding a language and a detection certainty */
 public class DetectedLanguage {
   private final String langCode;
   private final Double certainty;
-  
+
   DetectedLanguage(String lang, Double certainty) {
     this.langCode = lang;
     this.certainty = certainty;
   }
-  
+
   /**
    * Returns the detected language code
+   *
    * @return language code as a string
    */
   public String getLangCode() {
@@ -38,6 +37,7 @@ public class DetectedLanguage {
 
   /**
    * Returns the detected certainty for this language
+   *
    * @return certainty as a value between 0.0 and 1.0 where 1.0 is 100% certain
    */
   public Double getCertainty() {
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
index 3206656..608627e 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessor.java
@@ -16,42 +16,42 @@
  */
 package org.apache.solr.update.processor;
 
+import com.cybozu.labs.langdetect.Detector;
+import com.cybozu.labs.langdetect.DetectorFactory;
+import com.cybozu.labs.langdetect.LangDetectException;
+import com.cybozu.labs.langdetect.Language;
 import java.io.IOException;
 import java.io.Reader;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
-
-import com.cybozu.labs.langdetect.Detector;
-import com.cybozu.labs.langdetect.DetectorFactory;
-import com.cybozu.labs.langdetect.LangDetectException;
-import com.cybozu.labs.langdetect.Language;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Identifies the language of a set of input fields using 
https://github.com/shuyo/language-detection
- * <p>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html";>Detecting 
Languages During
+ * Identifies the language of a set of input fields using
+ * https://github.com/shuyo/language-detection
+ *
+ * <p>See <a 
href="https://solr.apache.org/guide/language-detection.html";>Detecting 
Languages During
  * Indexing</a> in the Solr Ref Guide
+ *
  * @since 3.5
  */
 public class LangDetectLanguageIdentifierUpdateProcessor extends 
LanguageIdentifierUpdateProcessor {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  public LangDetectLanguageIdentifierUpdateProcessor(SolrQueryRequest req,
-      SolrQueryResponse rsp, UpdateRequestProcessor next) {
+  public LangDetectLanguageIdentifierUpdateProcessor(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     super(req, rsp, next);
   }
 
   /**
-   * Detects language(s) from a reader, typically based on some fields in 
SolrInputDocument
-   * Classes wishing to implement their own language detection module should 
override this method.
+   * Detects language(s) from a reader, typically based on some fields in 
SolrInputDocument Classes
+   * wishing to implement their own language detection module should override 
this method.
    *
    * @param solrDocReader A reader serving the text from the document to detect
    * @return List of detected language(s) according to RFC-3066
@@ -62,16 +62,17 @@ public class LangDetectLanguageIdentifierUpdateProcessor 
extends LanguageIdentif
       Detector detector = DetectorFactory.create();
       detector.setMaxTextLength(maxTotalChars);
 
-      // TODO Work around bug in LangDetect 1.1 which does not expect a -1 
return value at end of stream,
+      // TODO Work around bug in LangDetect 1.1 which does not expect a -1 
return value at end of
+      // stream,
       // but instead only looks at ready()
       if (solrDocReader instanceof SolrInputDocumentReader) {
-        ((SolrInputDocumentReader)solrDocReader).setEodReturnValue(0);
+        ((SolrInputDocumentReader) solrDocReader).setEodReturnValue(0);
       }
       detector.append(solrDocReader);
 
       ArrayList<Language> langlist = detector.getProbabilities();
       ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
-      for (Language l: langlist) {
+      for (Language l : langlist) {
         solrLangList.add(new DetectedLanguage(l.lang, l.prob));
       }
       return solrLangList;
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
index 7a0db3a..99e04f0 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
@@ -16,6 +16,8 @@
  */
 package org.apache.solr.update.processor;
 
+import com.cybozu.labs.langdetect.DetectorFactory;
+import com.cybozu.labs.langdetect.LangDetectException;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -23,7 +25,6 @@ import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -33,16 +34,13 @@ import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.SolrCoreAware;
 
-import com.cybozu.labs.langdetect.DetectorFactory;
-import com.cybozu.labs.langdetect.LangDetectException;
-
 /**
  * Identifies the language of a set of input fields using
  * http://code.google.com/p/language-detection
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
+ *
+ * <p>The UpdateProcessorChain config entry can take a number of parameters 
which may also be passed
+ * as HTTP parameters on the update request and override the defaults. Here is 
the simplest
+ * processor config possible:
  *
  * <pre class="prettyprint" >
  * &lt;processor 
class=&quot;org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory&quot;&gt;
@@ -50,32 +48,35 @@ import com.cybozu.labs.langdetect.LangDetectException;
  *   &lt;str name=&quot;langid.langField&quot;&gt;language_s&lt;/str&gt;
  * &lt;/processor&gt;
  * </pre>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html";>https://solr.apache.org/guide/language-detection.html</a>
+ *
+ * See <a
+ * 
href="https://solr.apache.org/guide/language-detection.html";>https://solr.apache.org/guide/language-detection.html</a>
+ *
  * @since 3.5
  */
-public class LangDetectLanguageIdentifierUpdateProcessorFactory extends
-        UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
+public class LangDetectLanguageIdentifierUpdateProcessorFactory
+    extends UpdateRequestProcessorFactory implements SolrCoreAware, 
LangIdParams {
 
   protected SolrParams defaults;
   protected SolrParams appends;
   protected SolrParams invariants;
 
   @Override
-  public void inform(SolrCore core) {
-  }
+  public void inform(SolrCore core) {}
 
   /**
-   * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly
-   * to a RequestHandler, with defaults, appends and invariants.
+   * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly 
to a RequestHandler,
+   * with defaults, appends and invariants.
+   *
    * @param args a NamedList with the configuration parameters
    */
   @Override
-  public void init(NamedList<?> args )
-  {
+  public void init(NamedList<?> args) {
     try {
       loadData();
     } catch (Exception e) {
-      throw new RuntimeException("Couldn't load profile data, will return 
empty languages always!", e);
+      throw new RuntimeException(
+          "Couldn't load profile data, will return empty languages always!", 
e);
     }
     if (args != null) {
       Object o;
@@ -97,16 +98,15 @@ public class 
LangDetectLanguageIdentifierUpdateProcessorFactory extends
   }
 
   @Override
-  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
-                                            SolrQueryResponse rsp, 
UpdateRequestProcessor next) {
+  public UpdateRequestProcessor getInstance(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     // Process defaults, appends and invariants if we got a request
-    if(req != null) {
+    if (req != null) {
       SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
     }
     return new LangDetectLanguageIdentifierUpdateProcessor(req, rsp, next);
   }
 
-
   // DetectorFactory is totally global, so we only want to do this once... 
ever!!!
   static boolean loaded;
 
@@ -125,8 +125,11 @@ public class 
LangDetectLanguageIdentifierUpdateProcessorFactory extends
     loaded = true;
     List<String> profileData = new ArrayList<>();
     for (String language : languages) {
-      InputStream stream = 
LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/"
 + language);
-      BufferedReader reader = new BufferedReader(new InputStreamReader(stream, 
StandardCharsets.UTF_8));
+      InputStream stream =
+          
LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream(
+              "langdetect-profiles/" + language);
+      BufferedReader reader =
+          new BufferedReader(new InputStreamReader(stream, 
StandardCharsets.UTF_8));
       profileData.add(new String(IOUtils.toCharArray(reader)));
       reader.close();
     }
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
index 4dc04ee..3eb55b0 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LangIdParams.java
@@ -19,31 +19,43 @@ package org.apache.solr.update.processor;
 public interface LangIdParams {
 
   String LANGUAGE_ID = "langid";
-  String DOCID_PARAM =  LANGUAGE_ID + ".idField";
+  String DOCID_PARAM = LANGUAGE_ID + ".idField";
+
+  String FIELDS_PARAM = LANGUAGE_ID + ".fl"; // Field list to detect from
+  String LANG_FIELD = LANGUAGE_ID + ".langField"; // Main language detected
+  String LANGS_FIELD = LANGUAGE_ID + ".langsField"; // All languages detected 
(multiValued)
+  String FALLBACK = LANGUAGE_ID + ".fallback"; // Fallback lang code
+  String FALLBACK_FIELDS = LANGUAGE_ID + ".fallbackFields"; // Comma-sep list 
of fallback fields
+  String OVERWRITE =
+      LANGUAGE_ID + ".overwrite"; // Overwrite if existing language value in 
LANG_FIELD
+  String THRESHOLD = LANGUAGE_ID + ".threshold"; // Detection threshold
+  String ENFORCE_SCHEMA =
+      LANGUAGE_ID + ".enforceSchema"; // Enforces that output fields exist in 
schema
 
-  String FIELDS_PARAM = LANGUAGE_ID + ".fl";                 // Field list to 
detect from
-  String LANG_FIELD = LANGUAGE_ID + ".langField";            // Main language 
detected
-  String LANGS_FIELD = LANGUAGE_ID + ".langsField";          // All languages 
detected (multiValued)
-  String FALLBACK =  LANGUAGE_ID + ".fallback";              // Fallback lang 
code  
-  String FALLBACK_FIELDS =  LANGUAGE_ID + ".fallbackFields"; // Comma-sep list 
of fallback fields
-  String OVERWRITE  = LANGUAGE_ID + ".overwrite";            // Overwrite if 
existing language value in LANG_FIELD
-  String THRESHOLD  = LANGUAGE_ID + ".threshold";            // Detection 
threshold
-  String ENFORCE_SCHEMA =  LANGUAGE_ID + ".enforceSchema";   // Enforces that 
output fields exist in schema
   @Deprecated(since = "9.0.0")
-  String LANG_WHITELIST = LANGUAGE_ID + ".whitelist";        // Old property 
name for allowed languages
-  String LANG_ALLOWLIST = LANGUAGE_ID + ".allowlist";        // Allowed 
languages
-  String LCMAP =  LANGUAGE_ID + ".lcmap";                    // Maps detected 
langcode to other value
-  String MAP_ENABLE =  LANGUAGE_ID + ".map";                 // Turns on or 
off the field mapping
-  String MAP_FL =  LANGUAGE_ID + ".map.fl";                  // Field list for 
mapping
-  String MAP_OVERWRITE =  LANGUAGE_ID + ".map.overwrite";    // Whether to 
overwrite existing fields
-  String MAP_KEEP_ORIG =  LANGUAGE_ID + ".map.keepOrig";     // Keep original 
field after mapping
-  String MAP_INDIVIDUAL =  LANGUAGE_ID + ".map.individual";  // Detect 
language per individual field
-  String MAP_INDIVIDUAL_FL =  LANGUAGE_ID + ".map.individual.fl";// Field list 
of fields to redetect language for
-  String MAP_LCMAP =  LANGUAGE_ID + ".map.lcmap";            // Enables 
mapping multiple langs to same output field
-  String MAP_PATTERN =  LANGUAGE_ID + ".map.pattern";        // RegEx pattern 
to match field name
-  String MAP_REPLACE =  LANGUAGE_ID + ".map.replace";        // Replace pattern
-  String MAX_FIELD_VALUE_CHARS = LANGUAGE_ID + ".maxFieldValueChars";   // 
Maximum number of characters to use per field for language detection
-  String MAX_TOTAL_CHARS = LANGUAGE_ID + ".maxTotalChars";   // Maximum number 
of characters to use per all concatenated fields for language detection
+  String LANG_WHITELIST = LANGUAGE_ID + ".whitelist"; // Old property name for 
allowed languages
+
+  String LANG_ALLOWLIST = LANGUAGE_ID + ".allowlist"; // Allowed languages
+  String LCMAP = LANGUAGE_ID + ".lcmap"; // Maps detected langcode to other 
value
+  String MAP_ENABLE = LANGUAGE_ID + ".map"; // Turns on or off the field 
mapping
+  String MAP_FL = LANGUAGE_ID + ".map.fl"; // Field list for mapping
+  String MAP_OVERWRITE = LANGUAGE_ID + ".map.overwrite"; // Whether to 
overwrite existing fields
+  String MAP_KEEP_ORIG = LANGUAGE_ID + ".map.keepOrig"; // Keep original field 
after mapping
+  String MAP_INDIVIDUAL = LANGUAGE_ID + ".map.individual"; // Detect language 
per individual field
+  String MAP_INDIVIDUAL_FL =
+      LANGUAGE_ID + ".map.individual.fl"; // Field list of fields to redetect 
language for
+  String MAP_LCMAP =
+      LANGUAGE_ID + ".map.lcmap"; // Enables mapping multiple langs to same 
output field
+  String MAP_PATTERN = LANGUAGE_ID + ".map.pattern"; // RegEx pattern to match 
field name
+  String MAP_REPLACE = LANGUAGE_ID + ".map.replace"; // Replace pattern
+  String MAX_FIELD_VALUE_CHARS =
+      LANGUAGE_ID
+          + ".maxFieldValueChars"; // Maximum number of characters to use per 
field for language
+  // detection
+  String MAX_TOTAL_CHARS =
+      LANGUAGE_ID
+          + ".maxTotalChars"; // Maximum number of characters to use per all 
concatenated fields for
+  // language detection
 
   String DOCID_FIELD_DEFAULT = "id";
   String DOCID_LANGFIELD_DEFAULT = null;
@@ -53,7 +65,7 @@ public interface LangIdParams {
   int MAX_FIELD_VALUE_CHARS_DEFAULT = 10000;
   int MAX_TOTAL_CHARS_DEFAULT = 20000;
 
-  // TODO: This default threshold accepts even "uncertain" detections. 
+  // TODO: This default threshold accepts even "uncertain" detections.
   // Increase &langid.threshold above 0.5 to return only certain detections
   Double DOCID_THRESHOLD_DEFAULT = 0.5;
 }
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
index ff630f6..ad744e8 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.regex.Pattern;
-
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.SolrInputDocument;
@@ -39,17 +38,16 @@ import org.apache.solr.update.AddUpdateCommand;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
- * <p>
- *   Identifies the language of a set of input fields.
- *   Also supports mapping of field names based on detected language.
- * </p>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html";>Detecting 
Languages During Indexing</a> in reference guide
+ * Identifies the language of a set of input fields. Also supports mapping of 
field names based on
+ * detected language. See <a 
href="https://solr.apache.org/guide/language-detection.html";>Detecting
+ * Languages During Indexing</a> in reference guide
+ *
  * @since 3.5
  * @lucene.experimental
  */
-public abstract class LanguageIdentifierUpdateProcessor extends 
UpdateRequestProcessor implements LangIdParams {
+public abstract class LanguageIdentifierUpdateProcessor extends 
UpdateRequestProcessor
+    implements LangIdParams {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -74,8 +72,8 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
   protected HashSet<String> langAllowlist;
   protected HashSet<String> mapIndividualFieldsSet;
   protected HashSet<String> allMapFieldsSet;
-  protected HashMap<String,String> lcMap;
-  protected HashMap<String,String> mapLcMap;
+  protected HashMap<String, String> lcMap;
+  protected HashMap<String, String> mapLcMap;
   protected IndexSchema schema;
   protected int maxFieldValueChars;
   protected int maxTotalChars;
@@ -84,8 +82,8 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
   protected final Pattern tikaSimilarityPattern = 
Pattern.compile(".*\\((.*?)\\)");
   protected final Pattern langPattern = Pattern.compile("\\{lang\\}");
 
-  public LanguageIdentifierUpdateProcessor(SolrQueryRequest req,
-                                           SolrQueryResponse rsp, 
UpdateRequestProcessor next) {
+  public LanguageIdentifierUpdateProcessor(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     super(next);
     schema = req.getSchema();
 
@@ -96,34 +94,40 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
     if (params != null) {
       // Document-centric langId params
       setEnabled(params.getBool(LANGUAGE_ID, true));
-      if(params.get(FIELDS_PARAM, "").length() > 0) {
+      if (params.get(FIELDS_PARAM, "").length() > 0) {
         inputFields = params.get(FIELDS_PARAM, "").split(",");
       }
       langField = params.get(LANG_FIELD, DOCID_LANGFIELD_DEFAULT);
       langsField = params.get(LANGS_FIELD, DOCID_LANGSFIELD_DEFAULT);
       SchemaField uniqueKeyField = schema.getUniqueKeyField();
-      docIdField = params.get(DOCID_PARAM, uniqueKeyField == null ? 
DOCID_FIELD_DEFAULT : uniqueKeyField.getName());
+      docIdField =
+          params.get(
+              DOCID_PARAM, uniqueKeyField == null ? DOCID_FIELD_DEFAULT : 
uniqueKeyField.getName());
       fallbackValue = params.get(FALLBACK);
-      if(params.get(FALLBACK_FIELDS, "").length() > 0) {
+      if (params.get(FALLBACK_FIELDS, "").length() > 0) {
         fallbackFields = params.get(FALLBACK_FIELDS).split(",");
       }
       overwrite = params.getBool(OVERWRITE, false);
       langAllowlist = new HashSet<>();
       threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT);
       String legacyAllowList = params.get(LANG_WHITELIST, "");
-      if(legacyAllowList.length() > 0) {
+      if (legacyAllowList.length() > 0) {
         // nowarn compile time string concatenation
-        log.warn(LANG_WHITELIST + " parameter is deprecated; use " + 
LANG_ALLOWLIST + " instead."); // nowarn
+        log.warn(
+            LANG_WHITELIST
+                + " parameter is deprecated; use "
+                + LANG_ALLOWLIST
+                + " instead."); // nowarn
       }
-      if(params.get(LANG_ALLOWLIST, legacyAllowList).length() > 0) {
-        for(String lang : params.get(LANG_ALLOWLIST, "").split(",")) {
+      if (params.get(LANG_ALLOWLIST, legacyAllowList).length() > 0) {
+        for (String lang : params.get(LANG_ALLOWLIST, "").split(",")) {
           langAllowlist.add(lang);
         }
       }
 
       // Mapping params (field centric)
       enableMapping = params.getBool(MAP_ENABLE, false);
-      if(params.get(MAP_FL, "").length() > 0) {
+      if (params.get(MAP_FL, "").length() > 0) {
         mapFields = params.get(MAP_FL, "").split(",");
       } else {
         mapFields = inputFields;
@@ -134,7 +138,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
       // Process individual fields
       String[] mapIndividualFields = {};
-      if(params.get(MAP_INDIVIDUAL_FL, "").length() > 0) {
+      if (params.get(MAP_INDIVIDUAL_FL, "").length() > 0) {
         mapIndividualFields = params.get(MAP_INDIVIDUAL_FL, "").split(",");
       } else {
         mapIndividualFields = mapFields;
@@ -142,16 +146,16 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
       mapIndividualFieldsSet = new 
HashSet<>(Arrays.asList(mapIndividualFields));
       // Compile a union of the lists of fields to map
       allMapFieldsSet = new HashSet<>(Arrays.asList(mapFields));
-      if(Arrays.equals(mapFields, mapIndividualFields)) {
+      if (Arrays.equals(mapFields, mapIndividualFields)) {
         allMapFieldsSet.addAll(mapIndividualFieldsSet);
       }
 
       // Normalize detected langcode onto normalized langcode
       lcMap = new HashMap<>();
-      if(params.get(LCMAP) != null) {
-        for(String mapping : params.get(LCMAP).split("[, ]")) {
+      if (params.get(LCMAP) != null) {
+        for (String mapping : params.get(LCMAP).split("[, ]")) {
           String[] keyVal = mapping.split(":");
-          if(keyVal.length == 2) {
+          if (keyVal.length == 2) {
             lcMap.put(keyVal[0], keyVal[1]);
           } else {
             log.error("Unsupported format for langid.lcmap: {}. Skipping this 
mapping.", mapping);
@@ -161,13 +165,14 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
       // Language Code mapping
       mapLcMap = new HashMap<>();
-      if(params.get(MAP_LCMAP) != null) {
-        for(String mapping : params.get(MAP_LCMAP).split("[, ]")) {
+      if (params.get(MAP_LCMAP) != null) {
+        for (String mapping : params.get(MAP_LCMAP).split("[, ]")) {
           String[] keyVal = mapping.split(":");
-          if(keyVal.length == 2) {
+          if (keyVal.length == 2) {
             mapLcMap.put(keyVal[0], keyVal[1]);
           } else {
-            log.error("Unsupported format for langid.map.lcmap: {}. Skipping 
this mapping.", mapping);
+            log.error(
+                "Unsupported format for langid.map.lcmap: {}. Skipping this 
mapping.", mapping);
           }
         }
       }
@@ -180,27 +185,36 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
       if (maxFieldValueChars > maxTotalChars) {
         if (maxTotalChars == MAX_TOTAL_CHARS_DEFAULT) {
           // If the user specified only maxFieldValueChars, make maxTotalChars 
the same as it
-          log.warn("{} ({}) is less than {} ({}).  Setting {} to {}."
-              , MAX_FIELD_VALUE_CHARS, maxFieldValueChars, MAX_TOTAL_CHARS
-              , maxTotalChars, MAX_TOTAL_CHARS, maxFieldValueChars);
+          log.warn(
+              "{} ({}) is less than {} ({}).  Setting {} to {}.",
+              MAX_FIELD_VALUE_CHARS,
+              maxFieldValueChars,
+              MAX_TOTAL_CHARS,
+              maxTotalChars,
+              MAX_TOTAL_CHARS,
+              maxFieldValueChars);
           maxTotalChars = maxFieldValueChars;
         } else {
           // If the user specified maxTotalChars, make maxFieldValueChars the 
same as it
-          log.warn("{} ({}) is less than {} ({}).  Setting {} to {}."
-              , MAX_FIELD_VALUE_CHARS, maxFieldValueChars, MAX_TOTAL_CHARS
-              , maxTotalChars, MAX_FIELD_VALUE_CHARS, maxTotalChars );
+          log.warn(
+              "{} ({}) is less than {} ({}).  Setting {} to {}.",
+              MAX_FIELD_VALUE_CHARS,
+              maxFieldValueChars,
+              MAX_TOTAL_CHARS,
+              maxTotalChars,
+              MAX_FIELD_VALUE_CHARS,
+              maxTotalChars);
           maxFieldValueChars = maxTotalChars;
         }
       }
     }
     log.debug("LangId configured");
 
-
     if (inputFields.length == 0) {
-      throw new SolrException(ErrorCode.BAD_REQUEST,
-              "Missing or faulty configuration of 
LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma 
separated list");
+      throw new SolrException(
+          ErrorCode.BAD_REQUEST,
+          "Missing or faulty configuration of 
LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma 
separated list");
     }
-
   }
 
   @Override
@@ -215,6 +229,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
   /**
    * This is the main process method called from processAdd()
+   *
    * @param doc the SolrInputDocument to modify
    */
   protected void process(SolrInputDocument doc) {
@@ -222,20 +237,25 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
     HashSet<String> docLangs = new HashSet<>();
     String fallbackLang = getFallbackLang(doc, fallbackFields, fallbackValue);
 
-    if(langField == null || !doc.containsKey(langField) || 
(doc.containsKey(langField) && overwrite)) {
+    if (langField == null
+        || !doc.containsKey(langField)
+        || (doc.containsKey(langField) && overwrite)) {
       List<DetectedLanguage> languagelist = detectLanguage(doc);
       docLang = resolveLanguage(languagelist, fallbackLang);
       docLangs.add(docLang);
       if (log.isDebugEnabled()) {
-        log.debug("Detected main document language from fields {}: {}", 
Arrays.toString(inputFields), docLang);
+        log.debug(
+            "Detected main document language from fields {}: {}",
+            Arrays.toString(inputFields),
+            docLang);
       }
 
-      if(doc.containsKey(langField) && overwrite) {
+      if (doc.containsKey(langField) && overwrite) {
         if (log.isDebugEnabled()) {
           log.debug("Overwritten old value {}", doc.getFieldValue(langField));
         }
       }
-      if(langField != null && langField.length() != 0) {
+      if (langField != null && langField.length() != 0) {
         doc.setField(langField, docLang);
       }
     } else {
@@ -245,15 +265,17 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
       log.debug("Field {} already contained value {}, not overwriting.", 
langField, docLang);
     }
 
-    if(enableMapping) {
+    if (enableMapping) {
       for (String fieldName : allMapFieldsSet) {
-        if(doc.containsKey(fieldName)) {
+        if (doc.containsKey(fieldName)) {
           String fieldLang;
-          if(mapIndividual && mapIndividualFieldsSet.contains(fieldName)) {
-            List<DetectedLanguage> languagelist = 
detectLanguage(solrDocReader(doc, new String[]{fieldName}));
+          if (mapIndividual && mapIndividualFieldsSet.contains(fieldName)) {
+            List<DetectedLanguage> languagelist =
+                detectLanguage(solrDocReader(doc, new String[] {fieldName}));
             fieldLang = resolveLanguage(languagelist, docLang);
             docLangs.add(fieldLang);
-            log.debug("Mapping field {} using individually detected language 
{}", fieldName, fieldLang);
+            log.debug(
+                "Mapping field {} using individually detected language {}", 
fieldName, fieldLang);
           } else {
             fieldLang = docLang;
             log.debug("Mapping field {} using document global language {}", 
fieldName, fieldLang);
@@ -266,40 +288,46 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
             }
             SolrInputField inField = doc.getField(fieldName);
             doc.setField(mappedOutputField, inField.getValue());
-            if(!mapKeepOrig) {
+            if (!mapKeepOrig) {
               log.debug("Removing old field {}", fieldName);
               doc.removeField(fieldName);
             }
           } else {
-            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
"Invalid output field mapping for "
-                    + fieldName + " field and language: " + fieldLang);
+            throw new SolrException(
+                SolrException.ErrorCode.BAD_REQUEST,
+                "Invalid output field mapping for "
+                    + fieldName
+                    + " field and language: "
+                    + fieldLang);
           }
         }
       }
     }
 
     // Set the languages field to an array of all detected languages
-    if(langsField != null && langsField.length() != 0) {
+    if (langsField != null && langsField.length() != 0) {
       doc.setField(langsField, docLangs.toArray());
     }
   }
 
   /**
    * Decides the fallback language, either from content of fallback field or 
fallback value
+   *
    * @param doc the Solr document
    * @param fallbackFields an array of strings with field names containing 
fallback language codes
    * @param fallbackValue a language code to use in case no fallbackFields are 
found
    */
-  private String getFallbackLang(SolrInputDocument doc, String[] 
fallbackFields, String fallbackValue) {
+  private String getFallbackLang(
+      SolrInputDocument doc, String[] fallbackFields, String fallbackValue) {
     String lang = null;
-    for(String field : fallbackFields) {
-      if(doc.containsKey(field)) {
+    for (String field : fallbackFields) {
+      if (doc.containsKey(field)) {
         lang = (String) doc.getFieldValue(field);
         log.debug("Language fallback to field {}", field);
         break;
       }
     }
-    if(lang == null) {
+    if (lang == null) {
       log.debug("Language fallback to value {}", fallbackValue);
       lang = fallbackValue;
     }
@@ -308,6 +336,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
   /**
    * Detects language(s) from all configured fields
+   *
    * @param doc The solr document
    * @return List of detected language(s) according to RFC-3066
    */
@@ -316,8 +345,9 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
   }
 
   /**
-   * Detects language(s) from a reader, typically based on some fields in 
SolrInputDocument
-   * Classes wishing to implement their own language detection module should 
override this method.
+   * Detects language(s) from a reader, typically based on some fields in 
SolrInputDocument Classes
+   * wishing to implement their own language detection module should override 
this method.
+   *
    * @param solrDocReader A reader serving the text from the document to detect
    * @return List of detected language(s) according to RFC-3066
    */
@@ -325,6 +355,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
   /**
    * Chooses a language based on the list of candidates detected
+   *
    * @param language language code as a string
    * @param fallbackLang the language code to use as a fallback
    * @return a string of the chosen language
@@ -337,37 +368,42 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
   /**
    * Chooses a language based on the list of candidates detected
+   *
    * @param languages a List of DetectedLanguages with certainty score
    * @param fallbackLang the language code to use as a fallback
    * @return a string of the chosen language
    */
   protected String resolveLanguage(List<DetectedLanguage> languages, String 
fallbackLang) {
     String langStr;
-    if(languages.size() == 0) {
+    if (languages.size() == 0) {
       log.debug("No language detected, using fallback {}", fallbackLang);
       langStr = fallbackLang;
     } else {
       DetectedLanguage lang = languages.get(0);
       String normalizedLang = normalizeLangCode(lang.getLangCode());
-      if(langAllowlist.isEmpty() || langAllowlist.contains(normalizedLang)) {
+      if (langAllowlist.isEmpty() || langAllowlist.contains(normalizedLang)) {
         if (log.isDebugEnabled()) {
           log.debug("Language detected {} with certainty {}", normalizedLang, 
lang.getCertainty());
         }
-        if(lang.getCertainty() >= threshold) {
+        if (lang.getCertainty() >= threshold) {
           langStr = normalizedLang;
         } else {
-          log.debug("Detected language below threshold {}, using fallback {}", 
threshold, fallbackLang);
+          log.debug(
+              "Detected language below threshold {}, using fallback {}", 
threshold, fallbackLang);
           langStr = fallbackLang;
         }
       } else {
         if (log.isDebugEnabled()) {
-          log.debug("Detected a language not in allowlist ({}), using fallback 
{}", lang.getLangCode(), fallbackLang);
+          log.debug(
+              "Detected a language not in allowlist ({}), using fallback {}",
+              lang.getLangCode(),
+              fallbackLang);
         }
         langStr = fallbackLang;
       }
     }
 
-    if(langStr == null || langStr.length() == 0) {
+    if (langStr == null || langStr.length() == 0) {
       log.warn("Language resolved to null or empty string. Fallback not 
configured?");
       langStr = "";
     }
@@ -377,6 +413,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
 
   /**
    * Looks up language code in map (langid.lcmap) and returns mapped value
+   *
    * @param langCode the language code string returned from detector
    * @return the normalized/mapped language code
    */
@@ -390,10 +427,10 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
   }
 
   /**
-   * Returns the name of the field to map the current contents into, so that 
they are properly analyzed.  For instance
-   * if the currentField is "text" and the code is "en", the new field would 
by default be "text_en".
-   * This method also performs custom regex pattern replace if configured. If 
enforceSchema=true
-   * and the resulting field name doesn't exist, then null is returned.
+   * Returns the name of the field to map the current contents into, so that 
they are properly
+   * analyzed. For instance if the currentField is "text" and the code is 
"en", the new field would
+   * by default be "text_en". This method also performs custom regex pattern 
replace if configured.
+   * If enforceSchema=true and the resulting field name doesn't exist, then 
null is returned.
    *
    * @param currentField The current field name
    * @param language the language code
@@ -401,18 +438,29 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
    */
   protected String getMappedField(String currentField, String language) {
     String lc = mapLcMap.containsKey(language) ? mapLcMap.get(language) : 
language;
-    String newFieldName = 
langPattern.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr)).replaceFirst(lc);
-    if(enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
-      log.warn("Unsuccessful field name mapping from {} to {}, field does not 
exist and enforceSchema=true; skipping mapping.", currentField, newFieldName);
+    String newFieldName =
+        langPattern
+            
.matcher(mapPattern.matcher(currentField).replaceFirst(mapReplaceStr))
+            .replaceFirst(lc);
+    if (enforceSchema && schema.getFieldOrNull(newFieldName) == null) {
+      log.warn(
+          "Unsuccessful field name mapping from {} to {}, field does not exist 
and enforceSchema=true; skipping mapping.",
+          currentField,
+          newFieldName);
       return null;
     } else {
-      log.debug("Doing mapping from {} with language {} to field {}", 
currentField, language, newFieldName);
+      log.debug(
+          "Doing mapping from {} with language {} to field {}",
+          currentField,
+          language,
+          newFieldName);
     }
     return newFieldName;
   }
 
   /**
    * Tells if this processor is enabled or not
+   *
    * @return true if enabled, else false
    */
   public boolean isEnabled() {
@@ -424,8 +472,9 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
   }
 
   /**
-   * Returns a reader that streams String content from fields.
-   * This is more memory efficient than building a full string buffer
+   * Returns a reader that streams String content from fields. This is more 
memory efficient than
+   * building a full string buffer
+   *
    * @param doc the solr document
    * @param fields the field names to read
    * @return a reader over the fields
@@ -434,10 +483,7 @@ public abstract class LanguageIdentifierUpdateProcessor 
extends UpdateRequestPro
     return new SolrInputDocumentReader(doc, fields, maxTotalChars, 
maxFieldValueChars, " ");
   }
 
-  /**
-   * Concatenates content from input fields defined in langid.fl.
-   * For test purposes only
-   */
+  /** Concatenates content from input fields defined in langid.fl. For test 
purposes only */
   protected String concatFields(SolrInputDocument doc) {
     return SolrInputDocumentReader.asString(solrDocReader(doc, inputFields));
   }
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
index ab17133..8f6b611 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessor.java
@@ -23,21 +23,20 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-
+import opennlp.tools.langdetect.Language;
+import opennlp.tools.langdetect.LanguageDetectorME;
+import opennlp.tools.langdetect.LanguageDetectorModel;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import opennlp.tools.langdetect.Language;
-import opennlp.tools.langdetect.LanguageDetectorME;
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
 /**
- * Identifies the language of a set of input fields using <a 
href="https://opennlp.apache.org/";>Apache OpenNLP</a>.
- * <p>
- * See "Language Detector" section of
- * <a 
href="https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html";>https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html</a>
+ * Identifies the language of a set of input fields using <a
+ * href="https://opennlp.apache.org/";>Apache OpenNLP</a>.
+ *
+ * <p>See "Language Detector" section of <a
+ * 
href="https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html";>https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html</a>
  */
 public class OpenNLPLangDetectUpdateProcessor extends 
LanguageIdentifierUpdateProcessor {
 
@@ -45,10 +44,13 @@ public class OpenNLPLangDetectUpdateProcessor extends 
LanguageIdentifierUpdatePr
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   /** Maps ISO 639-3 (3-letter language code) to ISO 639-1 (2-letter language 
code) */
-  private static final Map<String,String> ISO639_MAP = make_ISO639_map();
-  
-  public OpenNLPLangDetectUpdateProcessor(SolrQueryRequest req, 
SolrQueryResponse rsp,
-      UpdateRequestProcessor next, LanguageDetectorModel model) {
+  private static final Map<String, String> ISO639_MAP = make_ISO639_map();
+
+  public OpenNLPLangDetectUpdateProcessor(
+      SolrQueryRequest req,
+      SolrQueryResponse rsp,
+      UpdateRequestProcessor next,
+      LanguageDetectorModel model) {
     super(req, rsp, next);
     this.model = model;
   }
@@ -60,8 +62,9 @@ public class OpenNLPLangDetectUpdateProcessor extends 
LanguageIdentifierUpdatePr
     if (content.length() != 0) {
       LanguageDetectorME ldme = new LanguageDetectorME(model);
       Language[] langs = ldme.predictLanguages(content);
-      for(Language language: langs){
-        languages.add(new DetectedLanguage(ISO639_MAP.get(language.getLang()), 
language.getConfidence()));
+      for (Language language : langs) {
+        languages.add(
+            new DetectedLanguage(ISO639_MAP.get(language.getLang()), 
language.getConfidence()));
       }
     } else {
       log.debug("No input text to detect language from, returning empty list");
@@ -69,8 +72,8 @@ public class OpenNLPLangDetectUpdateProcessor extends 
LanguageIdentifierUpdatePr
     return languages;
   }
 
-  private static Map<String,String> make_ISO639_map() {
-    Map<String,String> map = new HashMap<>();
+  private static Map<String, String> make_ISO639_map() {
+    Map<String, String> map = new HashMap<>();
     for (String lang : Locale.getISOLanguages()) {
       Locale locale = new Locale(lang);
       map.put(locale.getISO3Language(), locale.getLanguage());
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
index 14e9fa9..109fff0 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactory.java
@@ -18,7 +18,7 @@ package org.apache.solr.update.processor;
 
 import java.io.IOException;
 import java.io.InputStream;
-
+import opennlp.tools.langdetect.LanguageDetectorModel;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -29,14 +29,13 @@ import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.SolrCoreAware;
 
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
 /**
- * Identifies the language of a set of input fields using <a 
href="https://opennlp.apache.org/";>Apache OpenNLP</a>.
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
+ * Identifies the language of a set of input fields using <a
+ * href="https://opennlp.apache.org/";>Apache OpenNLP</a>.
+ *
+ * <p>The UpdateProcessorChain config entry can take a number of parameters 
which may also be passed
+ * as HTTP parameters on the update request and override the defaults. Here is 
the simplest
+ * processor config possible:
  *
  * <pre class="prettyprint" >
  * &lt;processor 
class=&quot;org.apache.solr.update.processor.OpenNLPLangDetectUpdateProcessorFactory&quot;&gt;
@@ -45,12 +44,14 @@ import opennlp.tools.langdetect.LanguageDetectorModel;
  *   &lt;str name="langid.model"&gt;langdetect-183.bin&lt;/str&gt;
  * &lt;/processor&gt;
  * </pre>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html#configuring-opennlp-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-opennlp-language-detection</a>
+ *
+ * See <a
+ * 
href="https://solr.apache.org/guide/language-detection.html#configuring-opennlp-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-opennlp-language-detection</a>
  *
  * @since 7.3.0
  */
 public class OpenNLPLangDetectUpdateProcessorFactory extends 
UpdateRequestProcessorFactory
-  implements SolrCoreAware {
+    implements SolrCoreAware {
 
   private static final String MODEL_PARAM = "langid.model";
   private String modelFile;
@@ -61,8 +62,7 @@ public class OpenNLPLangDetectUpdateProcessorFactory extends 
UpdateRequestProces
   private SolrResourceLoader solrResourceLoader;
 
   @Override
-  public void init(NamedList<?> args )
-  {
+  public void init(NamedList<?> args) {
     if (args != null) {
       Object o;
       o = args.get("defaults");
@@ -91,7 +91,8 @@ public class OpenNLPLangDetectUpdateProcessorFactory extends 
UpdateRequestProces
         } else {
           modelFile = defaults.get(MODEL_PARAM);
           if (modelFile == null) {
-            throw new RuntimeException("Couldn't load language model, will 
return empty languages always!");
+            throw new RuntimeException(
+                "Couldn't load language model, will return empty languages 
always!");
           }
         }
       }
@@ -99,7 +100,8 @@ public class OpenNLPLangDetectUpdateProcessorFactory extends 
UpdateRequestProces
   }
 
   @Override
-  public UpdateRequestProcessor getInstance(SolrQueryRequest req, 
SolrQueryResponse rsp, UpdateRequestProcessor next) {
+  public UpdateRequestProcessor getInstance(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     // Process defaults, appends and invariants if we got a request
     if (req != null) {
       SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
@@ -109,19 +111,18 @@ public class OpenNLPLangDetectUpdateProcessorFactory 
extends UpdateRequestProces
 
   private void loadModel() throws IOException {
     InputStream is = null;
-    try{
+    try {
       if (modelFile != null) {
         is = solrResourceLoader.openResource(modelFile);
         model = new LanguageDetectorModel(is);
       }
-    }
-    finally{
+    } finally {
       IOUtils.closeQuietly(is);
     }
   }
 
   @Override
-  public void inform(SolrCore core){
+  public void inform(SolrCore core) {
     solrResourceLoader = core.getResourceLoader();
     try {
       loadModel();
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/SolrInputDocumentReader.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/SolrInputDocumentReader.java
index ed839de..84fb505 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/SolrInputDocumentReader.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/SolrInputDocumentReader.java
@@ -23,7 +23,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
-
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@@ -33,7 +32,9 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Reader on top of SolrInputDocument that can "stream" a document as a 
character stream in a memory
- * efficient way, to avoid potentially large intermediate string buffers 
containing whole document content.
+ * efficient way, to avoid potentially large intermediate string buffers 
containing whole document
+ * content.
+ *
  * @lucene.experimental
  */
 public class SolrInputDocumentReader extends Reader {
@@ -51,23 +52,26 @@ public class SolrInputDocumentReader extends Reader {
   private int currentFieldValueIdx = 0;
   private int currentFieldValueOffset = 0;
   private boolean eod = false;
-  // Normally a Reader will return -1 at end of document, but to work around 
LangDetect's bug, we allow another value
+  // Normally a Reader will return -1 at end of document, but to work around 
LangDetect's bug, we
+  // allow another value
   private int eodReturnValue = -1;
 
   /**
-   * Creates a character-stream reader that streams all String fields in the 
document with space as separator 
+   * Creates a character-stream reader that streams all String fields in the 
document with space as
+   * separator
    *
    * @param doc Solr input document
    * @param maxCharsPerFieldValue max chars to consume per field value
    * @param maxTotalChars max chars to consume total
    */
-  public SolrInputDocumentReader(SolrInputDocument doc, int maxTotalChars, int 
maxCharsPerFieldValue) {
+  public SolrInputDocumentReader(
+      SolrInputDocument doc, int maxTotalChars, int maxCharsPerFieldValue) {
     this(doc, getStringFields(doc), maxTotalChars, maxCharsPerFieldValue, " ");
   }
-  
+
   /**
-   * Creates a character-stream reader that reads the listed fields in order, 
with
-   * max lengths as specified.
+   * Creates a character-stream reader that reads the listed fields in order, 
with max lengths as
+   * specified.
    *
    * @param doc Solr input document
    * @param fields list of field names to include
@@ -75,12 +79,17 @@ public class SolrInputDocumentReader extends Reader {
    * @param maxCharsPerFieldValue max chars to consume per field value
    * @param maxTotalChars max chars to consume total
    */
-  public SolrInputDocumentReader(SolrInputDocument doc, String[] fields, int 
maxTotalChars,
-                                 int maxCharsPerFieldValue, String 
fieldValueSep) {
+  public SolrInputDocumentReader(
+      SolrInputDocument doc,
+      String[] fields,
+      int maxTotalChars,
+      int maxCharsPerFieldValue,
+      String fieldValueSep) {
     this.doc = doc;
     this.fields = fields;
     this.fieldValueSep = fieldValueSep;
-    if (fields == null || fields.length == 0) throw new 
IllegalArgumentException("fields cannot be empty");
+    if (fields == null || fields.length == 0)
+      throw new IllegalArgumentException("fields cannot be empty");
     this.maxTotalChars = maxTotalChars;
     this.maxCharsPerFieldValue = maxCharsPerFieldValue;
   }
@@ -116,13 +125,13 @@ public class SolrInputDocumentReader extends Reader {
   }
 
   private int nextDocChunk(StringBuilder sb, int maxChunkLength) {
-    if (currentFieldIdx > fields.length-1) {
+    if (currentFieldIdx > fields.length - 1) {
       return returnEod();
     }
 
     int startFieldValueIdx = currentFieldValueIdx;
     int startFieldValueOffset = currentFieldValueOffset;
-    
+
     do {
       SolrInputField f = doc.getField(fields[currentFieldIdx]);
       if (f == null) {
@@ -139,7 +148,7 @@ public class SolrInputDocumentReader extends Reader {
         startFieldValueIdx = 0;
         if (sb.length() > 0) {
           if (maxChunkLength - sb.length() < fieldValueSep.length()) {
-            sb.append(fieldValueSep.substring(0,maxChunkLength - sb.length()));
+            sb.append(fieldValueSep.substring(0, maxChunkLength - 
sb.length()));
           } else {
             sb.append(fieldValueSep);
           }
@@ -162,7 +171,7 @@ public class SolrInputDocumentReader extends Reader {
       } else {
         incField(sb);
       }
-    } while (currentFieldIdx <= fields.length-1 && sb.length() < 
maxChunkLength);
+    } while (currentFieldIdx <= fields.length - 1 && sb.length() < 
maxChunkLength);
     return sb.length() == 0 ? eodReturnValue : sb.length();
   }
 
@@ -186,7 +195,9 @@ public class SolrInputDocumentReader extends Reader {
   }
 
   @Override
-  public void close() throws IOException { /* ignored */ }
+  public void close() throws IOException {
+    /* ignored */
+  }
 
   @Override
   public boolean ready() throws IOException {
@@ -194,8 +205,9 @@ public class SolrInputDocumentReader extends Reader {
   }
 
   /**
-   * Choose another return value than -1 for end of document reached.
-   * <b>Warning: Only to work around buggy consumers such as LangDetect 1.1</b>
+   * Choose another return value than -1 for end of document reached. 
<b>Warning: Only to work
+   * around buggy consumers such as LangDetect 1.1</b>
+   *
    * @param eodReturnValue integer which defaults to -1
    */
   public void setEodReturnValue(int eodReturnValue) {
@@ -203,22 +215,26 @@ public class SolrInputDocumentReader extends Reader {
   }
 
   /**
-   * Gets the whole reader as a String 
+   * Gets the whole reader as a String
+   *
    * @return string of concatenated fields
    */
   public static String asString(Reader reader) {
     try {
       return IOUtils.toString(reader);
     } catch (IOException e) {
-      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Failed 
reading doc content from reader", e);
+      throw new SolrException(
+          SolrException.ErrorCode.SERVER_ERROR, "Failed reading doc content 
from reader", e);
     }
   }
-  
+
   protected static String[] getStringFields(SolrInputDocument doc) {
     Iterable<SolrInputField> iterable = () -> doc.iterator();
-        List<String> strFields = StreamSupport.stream(iterable.spliterator(), 
false)
+    List<String> strFields =
+        StreamSupport.stream(iterable.spliterator(), false)
             .filter(f -> f.getFirstValue() instanceof String)
-            .map(SolrInputField::getName).collect(Collectors.toList());
-        return strFields.toArray(new String[0]);
+            .map(SolrInputField::getName)
+            .collect(Collectors.toList());
+    return strFields.toArray(new String[0]);
   }
 }
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
index 5537780..64c57e7 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java
@@ -20,7 +20,6 @@ import java.io.Reader;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.tika.language.LanguageIdentifier;
@@ -28,19 +27,20 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier.
- * The tika-core-x.y.jar must be on the classpath
- * <p>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection</a>
+ * Identifies the language of a set of input fields using Tika's 
LanguageIdentifier. The
+ * tika-core-x.y.jar must be on the classpath
+ *
+ * <p>See <a
+ * 
href="https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection</a>
+ *
  * @since 3.5
  */
 public class TikaLanguageIdentifierUpdateProcessor extends 
LanguageIdentifierUpdateProcessor {
 
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  public TikaLanguageIdentifierUpdateProcessor(SolrQueryRequest req,
-      SolrQueryResponse rsp, UpdateRequestProcessor next) {
+  public TikaLanguageIdentifierUpdateProcessor(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     super(req, rsp, next);
   }
 
@@ -51,16 +51,21 @@ public class TikaLanguageIdentifierUpdateProcessor extends 
LanguageIdentifierUpd
     if (content.length() != 0) {
       LanguageIdentifier identifier = new LanguageIdentifier(content);
       // FIXME: Hack - we get the distance from toString and calculate our own 
certainty score
-      Double distance = 
Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
-      // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better 
sweetspot than isReasonablyCertain()
+      Double distance =
+          Double.parseDouble(
+              
tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
+      // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better 
sweetspot than
+      // isReasonablyCertain()
       Double certainty = 1 - (5 * distance);
-      if (certainty < 0)
-        certainty = 0d;
+      if (certainty < 0) certainty = 0d;
       DetectedLanguage language = new 
DetectedLanguage(identifier.getLanguage(), certainty);
       languages.add(language);
       if (log.isDebugEnabled()) {
-        log.debug("Language detected as {} with a certainty of {} (Tika 
distance={})"
-            , language, language.getCertainty(), identifier);
+        log.debug(
+            "Language detected as {} with a certainty of {} (Tika 
distance={})",
+            language,
+            language.getCertainty(),
+            identifier);
       }
     } else {
       log.debug("No input text to detect language from, returning empty list");
diff --git 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
index 4c79dd5..a1ea4bf 100644
--- 
a/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
+++ 
b/solr/modules/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactory.java
@@ -25,12 +25,12 @@ import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.SolrCoreAware;
 
 /**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier. The tika-core-x.y.jar must be on the classpath
- * <p>
- * The UpdateProcessorChain config entry can take a number of parameters
- * which may also be passed as HTTP parameters on the update request
- * and override the defaults. Here is the simplest processor config possible:
+ * Identifies the language of a set of input fields using Tika's 
LanguageIdentifier. The
+ * tika-core-x.y.jar must be on the classpath
+ *
+ * <p>The UpdateProcessorChain config entry can take a number of parameters 
which may also be passed
+ * as HTTP parameters on the update request and override the defaults. Here is 
the simplest
+ * processor config possible:
  *
  * <pre class="prettyprint" >
  * &lt;processor 
class=&quot;org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory&quot;&gt;
@@ -38,28 +38,30 @@ import org.apache.solr.util.plugin.SolrCoreAware;
  *   &lt;str name=&quot;langid.langField&quot;&gt;language_s&lt;/str&gt;
  * &lt;/processor&gt;
  * </pre>
- * See <a 
href="https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection</a>
+ *
+ * See <a
+ * 
href="https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection";>https://solr.apache.org/guide/language-detection.html#configuring-tika-language-detection</a>
+ *
  * @since 3.5
  */
-public class TikaLanguageIdentifierUpdateProcessorFactory extends
-        UpdateRequestProcessorFactory implements SolrCoreAware, LangIdParams {
+public class TikaLanguageIdentifierUpdateProcessorFactory extends 
UpdateRequestProcessorFactory
+    implements SolrCoreAware, LangIdParams {
 
   protected SolrParams defaults;
   protected SolrParams appends;
   protected SolrParams invariants;
 
   @Override
-  public void inform(SolrCore core) {
-  }
+  public void inform(SolrCore core) {}
 
   /**
-   * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly
-   * to a RequestHandler, with defaults, appends and invariants.
+   * The UpdateRequestProcessor may be initialized in solrconfig.xml similarly 
to a RequestHandler,
+   * with defaults, appends and invariants.
+   *
    * @param args a NamedList with the configuration parameters
    */
   @Override
-  public void init(NamedList<?> args )
-  {
+  public void init(NamedList<?> args) {
     if (args != null) {
       Object o;
       o = args.get("defaults");
@@ -80,14 +82,12 @@ public class TikaLanguageIdentifierUpdateProcessorFactory 
extends
   }
 
   @Override
-  public UpdateRequestProcessor getInstance(SolrQueryRequest req,
-                                            SolrQueryResponse rsp, 
UpdateRequestProcessor next) {
+  public UpdateRequestProcessor getInstance(
+      SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor 
next) {
     // Process defaults, appends and invariants if we got a request
-    if(req != null) {
+    if (req != null) {
       SolrPluginUtils.setDefaults(req, defaults, appends, invariants);
     }
     return new TikaLanguageIdentifierUpdateProcessor(req, rsp, next);
   }
-
-
 }
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java
index e7d3c15..722205c 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactoryTest.java
@@ -20,12 +20,15 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.junit.Test;
 
-public class LangDetectLanguageIdentifierUpdateProcessorFactoryTest extends 
LanguageIdentifierUpdateProcessorFactoryTestCase {
+public class LangDetectLanguageIdentifierUpdateProcessorFactoryTest
+    extends LanguageIdentifierUpdateProcessorFactoryTestCase {
   @Override
-  protected LanguageIdentifierUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
-    return new 
LangDetectLanguageIdentifierUpdateProcessor(_parser.buildRequestFrom(h.getCore(),
 parameters, null), resp, null);
+  protected LanguageIdentifierUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters)
+      throws Exception {
+    return new LangDetectLanguageIdentifierUpdateProcessor(
+        _parser.buildRequestFrom(h.getCore(), parameters, null), resp, null);
   }
-  
+
   // this one actually works better it seems with short docs
   @Override
   protected SolrInputDocument tooShortDoc() {
@@ -33,29 +36,114 @@ public class 
LangDetectLanguageIdentifierUpdateProcessorFactoryTest extends Lang
     doc.addField("text", "");
     return doc;
   }
-  
+
   /* we don't return 'un' for the super-short one (this detector things 
hungarian?).
    * replace this with japanese
    */
-  @Test @Override
+  @Test
+  @Override
   public void testLangIdGlobal() throws Exception {
     ModifiableSolrParams parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "name,subject");
     parameters.add("langid.langField", "language_s");
     parameters.add("langid.fallback", "un");
     liProcessor = createLangIdProcessor(parameters);
-    
-    assertLang("no", "id", "1no", "name", "Lucene", "subject", "Lucene er et 
fri/åpen kildekode programvarebibliotek for informasjonsgjenfinning, 
opprinnelig utviklet i programmeringsspråket Java av Doug Cutting. Lucene 
støttes av Apache Software Foundation og utgis under Apache-lisensen.");
-    assertLang("en", "id", "2en", "name", "Lucene", "subject", "Apache Lucene 
is a free/open source information retrieval software library, originally 
created in Java by Doug Cutting. It is supported by the Apache Software 
Foundation and is released under the Apache Software License.");
-    assertLang("sv", "id", "3sv", "name", "Maven", "subject", "Apache Maven är 
ett verktyg utvecklat av Apache Software Foundation och används inom 
systemutveckling av datorprogram i programspråket Java. Maven används för att 
automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven 
används inom samma område som Apache Ant men dess byggfiler är deklarativa till 
skillnad ifrån Ants skriptbaserade.");
-    assertLang("es", "id", "4es", "name", "Lucene", "subject", "Lucene es un 
API de código abierto para recuperación de información, originalmente 
implementada en Java por Doug Cutting. Está apoyado por el Apache Software 
Foundation y se distribuye bajo la Apache Software License. Lucene tiene 
versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y 
PHP.");
-    assertLang("ja", "id", "5ja", "name", "Japanese", "subject", 
"日本語（にほんご、にっぽんご）は主として、日本で使用されてきた言語である。日本国は法令上、公用語を明記していないが、事実上の公用語となっており、学校教育の「国語」で教えられる。");
-    assertLang("th", "id", "6th", "name", "บทความคัดสรรเดือนนี้", "subject", 
"อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ 
เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี 
เธอมีชื่อเสียงโด่งดังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ
 บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ 
ระหว่างที่ถูกเยอรมนีเข้าค
 รอบครองในช่วงสงครามโลกครั้งที่สอง");
-    assertLang("ru", "id", "7ru", "name", "Lucene", "subject", "The Apache 
Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, 
написанная на Java. Может быть использована для поиска в интернете и других 
областях компьютерной лингвистики (аналитическая философия).");
-    assertLang("de", "id", "8de", "name", "Lucene", "subject", "Lucene ist ein 
Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware 
erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die 
Lucene-Werkzeuge für beliebige Projektgrößen und Anforderungen eingesetzt 
werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. 
Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- 
bzw. C++- Portierung von Lucene als  [...]
-    assertLang("fr", "id", "9fr", "name", "Lucene", "subject", "Lucene est un 
moteur de recherche libre écrit en Java qui permet d'indexer et de rechercher 
du texte. C'est un projet open source de la fondation Apache mis à disposition 
sous licence Apache. Il est également disponible pour les langages Ruby, Perl, 
C++, PHP.");
-    assertLang("nl", "id", "10nl", "name", "Lucene", "subject", "Lucene is een 
gratis open source, tekst gebaseerde information retrieval API van origine 
geschreven in Java door Doug Cutting. Het wordt ondersteund door de Apache 
Software Foundation en is vrijgegeven onder de Apache Software Licentie. Lucene 
is ook beschikbaar in andere programeertalen zoals Perl, C#, C++, Python, Ruby 
en PHP.");
-    assertLang("it", "id", "11it", "name", "Lucene", "subject", "Lucene è una 
API gratuita ed open source per il reperimento di informazioni inizialmente 
implementata in Java da Doug Cutting. È supportata dall'Apache Software 
Foundation ed è resa disponibile con l'Apache License. Lucene è stata 
successivamente reimplementata in Perl, C#, C++, Python, Ruby e PHP.");
-    assertLang("pt", "id", "12pt", "name", "Lucene", "subject", "Apache 
Lucene, ou simplesmente Lucene, é um software de busca e uma API de indexação 
de documentos, escrito na linguagem de programação Java. É um software de 
código aberto da Apache Software Foundation licenciado através da licença 
Apache.");
+
+    assertLang(
+        "no",
+        "id",
+        "1no",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene er et fri/åpen kildekode programvarebibliotek for 
informasjonsgjenfinning, opprinnelig utviklet i programmeringsspråket Java av 
Doug Cutting. Lucene støttes av Apache Software Foundation og utgis under 
Apache-lisensen.");
+    assertLang(
+        "en",
+        "id",
+        "2en",
+        "name",
+        "Lucene",
+        "subject",
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.");
+    assertLang(
+        "sv",
+        "id",
+        "3sv",
+        "name",
+        "Maven",
+        "subject",
+        "Apache Maven är ett verktyg utvecklat av Apache Software Foundation 
och används inom systemutveckling av datorprogram i programspråket Java. Maven 
används för att automatiskt paketera (bygga) programfilerna till en 
distribuerbar enhet. Maven används inom samma område som Apache Ant men dess 
byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
+    assertLang(
+        "es",
+        "id",
+        "4es",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene es un API de código abierto para recuperación de información, 
originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache 
Software Foundation y se distribuye bajo la Apache Software License. Lucene 
tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, 
Ruby y PHP.");
+    assertLang(
+        "ja",
+        "id",
+        "5ja",
+        "name",
+        "Japanese",
+        "subject",
+        
"日本語（にほんご、にっぽんご）は主として、日本で使用されてきた言語である。日本国は法令上、公用語を明記していないが、事実上の公用語となっており、学校教育の「国語」で教えられる。");
+    assertLang(
+        "th",
+        "id",
+        "6th",
+        "name",
+        "บทความคัดสรรเดือนนี้",
+        "subject",
+        "อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ 
เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี 
เธอมีชื่อเสียงโด่งดังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ
 บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ 
ระหว่างที่ถูกเยอรมนีเข้าครอบครองในช่วงสงครามโลกครั้งที่สอง");
+    assertLang(
+        "ru",
+        "id",
+        "7ru",
+        "name",
+        "Lucene",
+        "subject",
+        "The Apache Lucene — это свободная библиотека для высокоскоростного 
полнотекстового поиска, написанная на Java. Может быть использована для поиска 
в интернете и других областях компьютерной лингвистики (аналитическая 
философия).");
+    assertLang(
+        "de",
+        "id",
+        "8de",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, 
das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und 
Skalierbarkeit können die Lucene-Werkzeuge für beliebige Projektgrößen und 
Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für 
die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle 
und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer.");
+    assertLang(
+        "fr",
+        "id",
+        "9fr",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene est un moteur de recherche libre écrit en Java qui permet 
d'indexer et de rechercher du texte. C'est un projet open source de la 
fondation Apache mis à disposition sous licence Apache. Il est également 
disponible pour les langages Ruby, Perl, C++, PHP.");
+    assertLang(
+        "nl",
+        "id",
+        "10nl",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene is een gratis open source, tekst gebaseerde information 
retrieval API van origine geschreven in Java door Doug Cutting. Het wordt 
ondersteund door de Apache Software Foundation en is vrijgegeven onder de 
Apache Software Licentie. Lucene is ook beschikbaar in andere programeertalen 
zoals Perl, C#, C++, Python, Ruby en PHP.");
+    assertLang(
+        "it",
+        "id",
+        "11it",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene è una API gratuita ed open source per il reperimento di 
informazioni inizialmente implementata in Java da Doug Cutting. È supportata 
dall'Apache Software Foundation ed è resa disponibile con l'Apache License. 
Lucene è stata successivamente reimplementata in Perl, C#, C++, Python, Ruby e 
PHP.");
+    assertLang(
+        "pt",
+        "id",
+        "12pt",
+        "name",
+        "Lucene",
+        "subject",
+        "Apache Lucene, ou simplesmente Lucene, é um software de busca e uma 
API de indexação de documentos, escrito na linguagem de programação Java. É um 
software de código aberto da Apache Software Foundation licenciado através da 
licença Apache.");
   }
 }
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
index 4b19900..95a3a3b 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
@@ -18,7 +18,6 @@ package org.apache.solr.update.processor;
 
 import java.util.ArrayList;
 import java.util.List;
-
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
@@ -37,7 +36,10 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
 
   @BeforeClass
   public static void beforeClass() throws Exception {
-    initCore("solrconfig-languageidentifier.xml", "schema.xml", 
getFile("langid/solr").getAbsolutePath());
+    initCore(
+        "solrconfig-languageidentifier.xml",
+        "schema.xml",
+        getFile("langid/solr").getAbsolutePath());
     SolrCore core = h.getCore();
     UpdateRequestProcessorChain chained = 
core.getUpdateProcessingChain("lang_id_tika");
     assertNotNull(chained);
@@ -62,30 +64,163 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.langField", "language_s");
     parameters.add("langid.fallback", "un");
     liProcessor = createLangIdProcessor(parameters);
-    
-    assertLang("no", "id", "1no", "name", "Lucene", "subject", "Lucene er et 
fri/åpen kildekode programvarebibliotek for informasjonsgjenfinning, 
opprinnelig utviklet i programmeringsspråket Java av Doug Cutting. Lucene 
støttes av Apache Software Foundation og utgis under Apache-lisensen.");
-    assertLang("en", "id", "2en", "name", "Lucene", "subject", "Apache Lucene 
is a free/open source information retrieval software library, originally 
created in Java by Doug Cutting. It is supported by the Apache Software 
Foundation and is released under the Apache Software License.");
-    assertLang("sv", "id", "3sv", "name", "Maven", "subject", "Apache Maven är 
ett verktyg utvecklat av Apache Software Foundation och används inom 
systemutveckling av datorprogram i programspråket Java. Maven används för att 
automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven 
används inom samma område som Apache Ant men dess byggfiler är deklarativa till 
skillnad ifrån Ants skriptbaserade.");
-    assertLang("es", "id", "4es", "name", "Español", "subject", "El español, 
como las otras lenguas romances, es una continuación moderna del latín hablado 
(denominado latín vulgar), desde el siglo III, que tras el desmembramiento del 
Imperio romano fue divergiendo de las otras variantes del latín que se hablaban 
en las distintas provincias del antiguo Imperio, dando lugar mediante una lenta 
evolución a las distintas lenguas romances. Debido a su propagación por 
América, el español es, c [...]
+
+    assertLang(
+        "no",
+        "id",
+        "1no",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene er et fri/åpen kildekode programvarebibliotek for 
informasjonsgjenfinning, opprinnelig utviklet i programmeringsspråket Java av 
Doug Cutting. Lucene støttes av Apache Software Foundation og utgis under 
Apache-lisensen.");
+    assertLang(
+        "en",
+        "id",
+        "2en",
+        "name",
+        "Lucene",
+        "subject",
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.");
+    assertLang(
+        "sv",
+        "id",
+        "3sv",
+        "name",
+        "Maven",
+        "subject",
+        "Apache Maven är ett verktyg utvecklat av Apache Software Foundation 
och används inom systemutveckling av datorprogram i programspråket Java. Maven 
används för att automatiskt paketera (bygga) programfilerna till en 
distribuerbar enhet. Maven används inom samma område som Apache Ant men dess 
byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
+    assertLang(
+        "es",
+        "id",
+        "4es",
+        "name",
+        "Español",
+        "subject",
+        "El español, como las otras lenguas romances, es una continuación 
moderna del latín hablado (denominado latín vulgar), desde el siglo III, que 
tras el desmembramiento del Imperio romano fue divergiendo de las otras 
variantes del latín que se hablaban en las distintas provincias del antiguo 
Imperio, dando lugar mediante una lenta evolución a las distintas lenguas 
romances. Debido a su propagación por América, el español es, con diferencia, 
la lengua romance que ha logrado mayor di [...]
     assertLang("un", "id", "5un", "name", "a", "subject", "b");
-    assertLang("th", "id", "6th", "name", "บทความคัดสรรเดือนนี้", "subject", 
"อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ 
เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี 
เธอมีชื่อเสียงโด่งดังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ
 บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ 
ระหว่างที่ถูกเยอรมนีเข้าค
 รอบครองในช่วงสงครามโลกครั้งที่สอง");
-    assertLang("ru", "id", "7ru", "name", "Lucene", "subject", "The Apache 
Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, 
написанная на Java. Может быть использована для поиска в интернете и других 
областях компьютерной лингвистики (аналитическая философия).");
-    assertLang("de", "id", "8de", "name", "Lucene", "subject", "Lucene ist ein 
Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware 
erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die 
Lucene-Werkzeuge für beliebige Projektgrößen und Anforderungen eingesetzt 
werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. 
Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- 
bzw. C++- Portierung von Lucene als  [...]
-    assertLang("fr", "id", "9fr", "name", "Lucene", "subject", "Lucene est un 
moteur de recherche libre écrit en Java qui permet d'indexer et de rechercher 
du texte. C'est un projet open source de la fondation Apache mis à disposition 
sous licence Apache. Il est également disponible pour les langages Ruby, Perl, 
C++, PHP.");
-    assertLang("nl", "id", "10nl", "name", "Lucene", "subject", "Lucene is een 
gratis open source, tekst gebaseerde information retrieval API van origine 
geschreven in Java door Doug Cutting. Het wordt ondersteund door de Apache 
Software Foundation en is vrijgegeven onder de Apache Software Licentie. Lucene 
is ook beschikbaar in andere programeertalen zoals Perl, C#, C++, Python, Ruby 
en PHP.");
-    assertLang("it", "id", "11it", "name", "Lucene", "subject", "Lucene è una 
API gratuita ed open source per il reperimento di informazioni inizialmente 
implementata in Java da Doug Cutting. È supportata dall'Apache Software 
Foundation ed è resa disponibile con l'Apache License. Lucene è stata 
successivamente reimplementata in Perl, C#, C++, Python, Ruby e PHP.");
-    assertLang("pt", "id", "12pt", "name", "Lucene", "subject", "Apache 
Lucene, ou simplesmente Lucene, é um software de busca e uma API de indexação 
de documentos, escrito na linguagem de programação Java. É um software de 
código aberto da Apache Software Foundation licenciado através da licença 
Apache.");
+    assertLang(
+        "th",
+        "id",
+        "6th",
+        "name",
+        "บทความคัดสรรเดือนนี้",
+        "subject",
+        "อันเนอลีส มารี อันเนอ ฟรังค์ หรือมักรู้จักในภาษาไทยว่า แอนน์ แฟรงค์ 
เป็นเด็กหญิงชาวยิว เกิดที่เมืองแฟรงก์เฟิร์ต ประเทศเยอรมนี 
เธอมีชื่อเสียงโด่งดังในฐานะผู้เขียนบันทึกประจำวันซึ่งต่อมาได้รับการตีพิมพ์เป็นหนังสือ
 บรรยายเหตุการณ์ขณะหลบซ่อนตัวจากการล่าชาวยิวในประเทศเนเธอร์แลนด์ 
ระหว่างที่ถูกเยอรมนีเข้าครอบครองในช่วงสงครามโลกครั้งที่สอง");
+    assertLang(
+        "ru",
+        "id",
+        "7ru",
+        "name",
+        "Lucene",
+        "subject",
+        "The Apache Lucene — это свободная библиотека для высокоскоростного 
полнотекстового поиска, написанная на Java. Может быть использована для поиска 
в интернете и других областях компьютерной лингвистики (аналитическая 
философия).");
+    assertLang(
+        "de",
+        "id",
+        "8de",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, 
das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und 
Skalierbarkeit können die Lucene-Werkzeuge für beliebige Projektgrößen und 
Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für 
die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle 
und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer.");
+    assertLang(
+        "fr",
+        "id",
+        "9fr",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene est un moteur de recherche libre écrit en Java qui permet 
d'indexer et de rechercher du texte. C'est un projet open source de la 
fondation Apache mis à disposition sous licence Apache. Il est également 
disponible pour les langages Ruby, Perl, C++, PHP.");
+    assertLang(
+        "nl",
+        "id",
+        "10nl",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene is een gratis open source, tekst gebaseerde information 
retrieval API van origine geschreven in Java door Doug Cutting. Het wordt 
ondersteund door de Apache Software Foundation en is vrijgegeven onder de 
Apache Software Licentie. Lucene is ook beschikbaar in andere programeertalen 
zoals Perl, C#, C++, Python, Ruby en PHP.");
+    assertLang(
+        "it",
+        "id",
+        "11it",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene è una API gratuita ed open source per il reperimento di 
informazioni inizialmente implementata in Java da Doug Cutting. È supportata 
dall'Apache Software Foundation ed è resa disponibile con l'Apache License. 
Lucene è stata successivamente reimplementata in Perl, C#, C++, Python, Ruby e 
PHP.");
+    assertLang(
+        "pt",
+        "id",
+        "12pt",
+        "name",
+        "Lucene",
+        "subject",
+        "Apache Lucene, ou simplesmente Lucene, é um software de busca e uma 
API de indexação de documentos, escrito na linguagem de programação Java. É um 
software de código aberto da Apache Software Foundation licenciado através da 
licença Apache.");
     // New in Tika1.0
-    assertLang("ca", "id", "13ca", "name", "Catalan", "subject", "El català 
posseeix dos estàndards principals: el regulat per l'Institut d'Estudis 
Catalans, o estàndard general, que pren com a base l'ortografia establerta per 
Pompeu Fabra amb els trets gramaticals i ortogràfics característics del català 
central; i el regulat per l'Acadèmia Valenciana de la Llengua, estàndard 
d'àmbit restringit, centrat en l'estandardització del valencià i que pren com a 
base les Normes de Castelló, és a [...]
-    assertLang("be", "id", "14be", "name", "Belarusian", "subject", "Наступнай 
буйной дзяржавай на беларускай зямлі было Вялікае княства Літоўскае, Рускае і 
Жамойцкае (ВКЛ). Падчас стварэння і пачатковага развіцця гэтай дзяржавы 
найбуйнейшым і асноўным яе цэнтрам быў Новагародак. Акрамя сучасных земляў 
Беларусі, у склад гэтай дзяржавы ўваходзілі таксама землі сучаснай Літвы, 
паўночная частка сучаснай Украіны і частка сучаснай Расіі.");
-    assertLang("eo", "id", "15eo", "name", "Esperanto", "subject", "La 
vortprovizo de Esperanto devenas plejparte el la okcidenteŭropaj lingvoj, dum 
ĝia sintakso kaj morfologio montras ankaŭ slavlingvan influon. La morfemoj ne 
ŝanĝiĝas kaj oni povas ilin preskaŭ senlime kombini, kreante diverssignifajn 
vortojn, Esperanto do havas multajn kunaĵojn kun la analizaj lingvoj, al kiuj 
apartenas ekzemple la ĉina; kontraŭe la interna strukturo de Esperanto 
certagrade respegulas la aglutinajn lin [...]
-    assertLang("gl", "id", "16gl", "name", "Galician", "subject", "A cifra de 
falantes medrou axiña durante as décadas seguintes, nun principio no Imperio 
ruso e na Europa oriental, logo na Europa occidental, América, China e no 
Xapón. Nos primeiros anos do movemento, os esperantistas mantiñan contacto por 
correspondencia, pero en 1905 o primeiro Congreso Universal de Esperanto 
levouse a cabo na cidade francesa de Boulogne-sur-Mer. Dende entón, os 
congresos mundiais organizáronse nos cin [...]
-    assertLang("ro", "id", "17ro", "name", "Romanian", "subject", "La momentul 
destrămării Uniunii Sovietice și a înlăturării regimului comunist instalat în 
România (1989), țara a inițiat o serie de reforme economice și politice. După 
un deceniu de probleme economice, România a introdus noi reforme economice de 
ordin general (precum cota unică de impozitare, în 2005) și a aderat la Uniunea 
Europeană la 1 ianuarie 2007.");
-    assertLang("sk", "id", "18sk", "name", "Slovakian", "subject", "Boli 
vytvorené dva národné parlamenty - Česká národná rada a Slovenská národná rada 
a spoločný jednokomorový česko-slovenský parlament bol premenovaný z Národného 
zhromaždenia na Federálne zhromaždenie s dvoma komorami - Snemovňou ľudu a 
Snemovňu národov.");
-    assertLang("sl", "id", "19sl", "name", "Slovenian", "subject", "Slovenska 
Wikipedija je različica spletne enciklopedije Wikipedije v slovenskem jeziku. 
Projekt slovenske Wikipedije se je začel 26. februarja 2002 z ustanovitvijo 
njene spletne strani, njen pobudnik pa je bil uporabnik Jani Melik.");
-    assertLang("uk", "id", "20uk", "name", "Ukrainian", "subject", 
"Народно-господарський комплекс країни включає такі види промисловості як важке 
машинобудування, чорна та кольорова металургія, суднобудування, виробництво 
автобусів, легкових та вантажних автомобілів, тракторів та іншої 
сільськогосподарської техніки, тепловозів, верстатів, турбін, авіаційних 
двигунів та літаків, обладнання для електростанцій, нафто-газової та хімічної 
промисловості тощо. Крім того, Україна є потужним вир [...]
+    assertLang(
+        "ca",
+        "id",
+        "13ca",
+        "name",
+        "Catalan",
+        "subject",
+        "El català posseeix dos estàndards principals: el regulat per 
l'Institut d'Estudis Catalans, o estàndard general, que pren com a base 
l'ortografia establerta per Pompeu Fabra amb els trets gramaticals i 
ortogràfics característics del català central; i el regulat per l'Acadèmia 
Valenciana de la Llengua, estàndard d'àmbit restringit, centrat en 
l'estandardització del valencià i que pren com a base les Normes de Castelló, 
és a dir, l'ortografia de Pompeu Fabra però més adaptada a la [...]
+    assertLang(
+        "be",
+        "id",
+        "14be",
+        "name",
+        "Belarusian",
+        "subject",
+        "Наступнай буйной дзяржавай на беларускай зямлі было Вялікае княства 
Літоўскае, Рускае і Жамойцкае (ВКЛ). Падчас стварэння і пачатковага развіцця 
гэтай дзяржавы найбуйнейшым і асноўным яе цэнтрам быў Новагародак. Акрамя 
сучасных земляў Беларусі, у склад гэтай дзяржавы ўваходзілі таксама землі 
сучаснай Літвы, паўночная частка сучаснай Украіны і частка сучаснай Расіі.");
+    assertLang(
+        "eo",
+        "id",
+        "15eo",
+        "name",
+        "Esperanto",
+        "subject",
+        "La vortprovizo de Esperanto devenas plejparte el la okcidenteŭropaj 
lingvoj, dum ĝia sintakso kaj morfologio montras ankaŭ slavlingvan influon. La 
morfemoj ne ŝanĝiĝas kaj oni povas ilin preskaŭ senlime kombini, kreante 
diverssignifajn vortojn, Esperanto do havas multajn kunaĵojn kun la analizaj 
lingvoj, al kiuj apartenas ekzemple la ĉina; kontraŭe la interna strukturo de 
Esperanto certagrade respegulas la aglutinajn lingvojn, kiel la japanan, 
svahilan aŭ turkan.");
+    assertLang(
+        "gl",
+        "id",
+        "16gl",
+        "name",
+        "Galician",
+        "subject",
+        "A cifra de falantes medrou axiña durante as décadas seguintes, nun 
principio no Imperio ruso e na Europa oriental, logo na Europa occidental, 
América, China e no Xapón. Nos primeiros anos do movemento, os esperantistas 
mantiñan contacto por correspondencia, pero en 1905 o primeiro Congreso 
Universal de Esperanto levouse a cabo na cidade francesa de Boulogne-sur-Mer. 
Dende entón, os congresos mundiais organizáronse nos cinco continentes ano tras 
ano agás durante as dúas Guerras M [...]
+    assertLang(
+        "ro",
+        "id",
+        "17ro",
+        "name",
+        "Romanian",
+        "subject",
+        "La momentul destrămării Uniunii Sovietice și a înlăturării regimului 
comunist instalat în România (1989), țara a inițiat o serie de reforme 
economice și politice. După un deceniu de probleme economice, România a 
introdus noi reforme economice de ordin general (precum cota unică de 
impozitare, în 2005) și a aderat la Uniunea Europeană la 1 ianuarie 2007.");
+    assertLang(
+        "sk",
+        "id",
+        "18sk",
+        "name",
+        "Slovakian",
+        "subject",
+        "Boli vytvorené dva národné parlamenty - Česká národná rada a 
Slovenská národná rada a spoločný jednokomorový česko-slovenský parlament bol 
premenovaný z Národného zhromaždenia na Federálne zhromaždenie s dvoma komorami 
- Snemovňou ľudu a Snemovňu národov.");
+    assertLang(
+        "sl",
+        "id",
+        "19sl",
+        "name",
+        "Slovenian",
+        "subject",
+        "Slovenska Wikipedija je različica spletne enciklopedije Wikipedije v 
slovenskem jeziku. Projekt slovenske Wikipedije se je začel 26. februarja 2002 
z ustanovitvijo njene spletne strani, njen pobudnik pa je bil uporabnik Jani 
Melik.");
+    assertLang(
+        "uk",
+        "id",
+        "20uk",
+        "name",
+        "Ukrainian",
+        "subject",
+        "Народно-господарський комплекс країни включає такі види промисловості 
як важке машинобудування, чорна та кольорова металургія, суднобудування, 
виробництво автобусів, легкових та вантажних автомобілів, тракторів та іншої 
сільськогосподарської техніки, тепловозів, верстатів, турбін, авіаційних 
двигунів та літаків, обладнання для електростанцій, нафто-газової та хімічної 
промисловості тощо. Крім того, Україна є потужним виробником електроенергії. 
Україна має розвинуте сільське госп [...]
   }
-    
+
   @Test
   public void testMapFieldName() throws Exception {
     ModifiableSolrParams parameters = new ModifiableSolrParams();
@@ -93,7 +228,7 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.map.lcmap", "jp:s zh:cjk ko:cjk");
     parameters.set("langid.enforceSchema", "false");
     liProcessor = createLangIdProcessor(parameters);
-    
+
     assertEquals("test_no", liProcessor.getMappedField("test", "no"));
     assertEquals("test_en", liProcessor.getMappedField("test", "en"));
     assertEquals("test_s", liProcessor.getMappedField("test", "jp"));
@@ -139,11 +274,11 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.enforceSchema", "false");
     parameters.add("langid.map", "true");
     liProcessor = createLangIdProcessor(parameters);
-    
+
     doc = englishDoc();
     assertEquals("en", process(doc).getFieldValue("language"));
     assertEquals("en", process(doc).getFieldValue("languages"));
-    
+
     doc = englishDoc();
     doc.setField("language", "no");
     assertEquals("no", process(doc).getFieldValue("language"));
@@ -152,8 +287,8 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
   }
 
   /**
-   * Test not only 1st value taken into account (empty string),
-   * but all other values of 'text_multivalue' field ('en').
+   * Test not only 1st value taken into account (empty string), but all other 
values of
+   * 'text_multivalue' field ('en').
    */
   @Test
   public void testPreExistingMultiValue() throws Exception {
@@ -165,11 +300,11 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.enforceSchema", "false");
     parameters.add("langid.map", "true");
     liProcessor = createLangIdProcessor(parameters);
-    
+
     doc = englishDoc();
     assertEquals("en", process(doc).getFieldValue("language"));
     assertEquals("en", process(doc).getFieldValue("languages"));
-    
+
     doc = englishDoc();
     doc.setField("language", "no");
     assertEquals("no", process(doc).getFieldValue("language"));
@@ -178,8 +313,8 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
   }
 
   /**
-   * Test not only 1st value taken into account (ru text),
-   * but all values of 'text_multivalue' field ('ru' and 'en').
+   * Test not only 1st value taken into account (ru text), but all values of 
'text_multivalue' field
+   * ('ru' and 'en').
    */
   @Test
   public void testPreExistingMultiValueMixedLang() throws Exception {
@@ -211,7 +346,7 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.langField", "language");
     parameters.add("langid.enforceSchema", "false");
     liProcessor = createLangIdProcessor(parameters);
-    
+
     doc = tooShortDoc();
     assertEquals("", process(doc).getFieldValue("language"));
   }
@@ -239,7 +374,7 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     parameters.add("langid.fallback", "fbVal");
     parameters.add("langid.enforceSchema", "false");
     liProcessor = createLangIdProcessor(parameters);
-      
+
     // Verify fallback to field fb (noop field does not exist and is skipped)
     doc = tooShortDoc();
     doc.addField("fb", "fbField");
@@ -247,9 +382,9 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
 
     // Verify fallback to fallback value since no fallback fields exist
     doc = tooShortDoc();
-    assertEquals("fbVal", process(doc).getFieldValue("language"));  
+    assertEquals("fbVal", process(doc).getFieldValue("language"));
   }
-  
+
   @Test
   public void testResolveLanguage() throws Exception {
     List<DetectedLanguage> langs;
@@ -265,14 +400,14 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
 
     // One detected language
     langs.add(new DetectedLanguage("one", 1.0));
-    assertEquals("one", liProcessor.resolveLanguage(langs, "fallback"));    
+    assertEquals("one", liProcessor.resolveLanguage(langs, "fallback"));
 
     // One detected language under default threshold
     langs = new ArrayList<>();
     langs.add(new DetectedLanguage("under", 0.1));
-    assertEquals("fallback", liProcessor.resolveLanguage(langs, "fallback"));  
  
+    assertEquals("fallback", liProcessor.resolveLanguage(langs, "fallback"));
   }
-  
+
   @Test
   public void testKeepOrig() throws Exception {
     ModifiableSolrParams parameters = new ModifiableSolrParams();
@@ -289,7 +424,7 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     assertEquals("en", mappedNoOrig.getFieldValue("language"));
     assertTrue(mappedNoOrig.containsKey("text_en"));
     assertFalse(mappedNoOrig.containsKey("text"));
-    
+
     // keepOrig true
     parameters.set("langid.map.keepOrig", "true");
     liProcessor = createLangIdProcessor(parameters);
@@ -298,7 +433,7 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     assertTrue(mappedKeepOrig.containsKey("text_en"));
     assertTrue(mappedKeepOrig.containsKey("text"));
     assertEquals(englishDoc().getFieldValue("text"), 
mappedKeepOrig.getFieldValue("text_en"));
-    
+
     // keepOrig and map individual
     parameters.set("langid.map.individual", "true");
     parameters.set("langid.fl", "text,text2");
@@ -309,7 +444,8 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     assertTrue(mappedIndividual.containsKey("text"));
     assertTrue(mappedIndividual.containsKey("text2_ru"));
     assertTrue(mappedIndividual.containsKey("text2"));
-    assertEquals(languagePerFieldDoc().getFieldValue("text"), 
mappedIndividual.getFieldValue("text_en"));
+    assertEquals(
+        languagePerFieldDoc().getFieldValue("text"), 
mappedIndividual.getFieldValue("text_en"));
   }
 
   @Test
@@ -327,32 +463,45 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     assertTrue(mappedIndividual.containsKey("text_en"));
     assertTrue(mappedIndividual.containsKey("text2_ru"));
   }
-  
+
   // Various utility methods
-  
+
   private SolrInputDocument englishDoc() {
     SolrInputDocument doc = new SolrInputDocument();
-    doc.addField("text", "Apache Lucene is a free/open source information 
retrieval software library, originally created in Java by Doug Cutting. It is 
supported by the Apache Software Foundation and is released under the Apache 
Software License.");
-    doc.addField("text_multivalue", new String[]{"", "Apache Lucene is a 
free/open source information retrieval software library, originally created in 
Java by Doug Cutting. It is supported by the Apache Software Foundation and is 
released under the Apache Software License."});
+    doc.addField(
+        "text",
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.");
+    doc.addField(
+        "text_multivalue",
+        new String[] {
+          "",
+          "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License."
+        });
     return doc;
   }
 
   private SolrInputDocument languagePerFieldDoc() {
     SolrInputDocument doc = englishDoc();
-    doc.addField("text2", "The Apache Lucene — это свободная библиотека для 
высокоскоростного полнотекстового поиска, написанная на Java. Может быть 
использована для поиска в интернете и других областях компьютерной лингвистики 
(аналитическая философия).");
+    doc.addField(
+        "text2",
+        "The Apache Lucene — это свободная библиотека для высокоскоростного 
полнотекстового поиска, написанная на Java. Может быть использована для поиска 
в интернете и других областях компьютерной лингвистики (аналитическая 
философия).");
     return doc;
   }
-  
+
   /**
    * Construct document containing multi-value fields in different languages.
+   *
    * @return solr input document
    */
   private SolrInputDocument mixedEnglishRussianDoc() {
     SolrInputDocument doc = new SolrInputDocument();
-    doc.addField("text_multivalue", new String[]{"The Apache Lucene — это 
свободная библиотека для высокоскоростного полнотекстового поиска, написанная 
на Java. Может быть использована для поиска в интернете и других областях 
компьютерной лингвистики (аналитическая философия).",
-                                                 "Apache Lucene is a free/open 
source information retrieval software library, originally created in Java by 
Doug Cutting. It is supported by the Apache Software Foundation and is released 
under the Apache Software License.",
-        "Solr (pronounced \"solar\") is an open source enterprise search 
platform from the Apache Lucene project. Its major features include full-text 
search, hit highlighting, faceted search, dynamic clustering, database 
integration, and rich document (e.g., Word, PDF) handling."
-    });
+    doc.addField(
+        "text_multivalue",
+        new String[] {
+          "The Apache Lucene — это свободная библиотека для высокоскоростного 
полнотекстового поиска, написанная на Java. Может быть использована для поиска 
в интернете и других областях компьютерной лингвистики (аналитическая 
философия).",
+          "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.",
+          "Solr (pronounced \"solar\") is an open source enterprise search 
platform from the Apache Lucene project. Its major features include full-text 
search, hit highlighting, faceted search, dynamic clustering, database 
integration, and rich document (e.g., Word, PDF) handling."
+        });
     return doc;
   }
 
@@ -362,23 +511,24 @@ public abstract class 
LanguageIdentifierUpdateProcessorFactoryTestCase extends S
     return doc;
   }
 
-  protected abstract LanguageIdentifierUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters) throws Exception;
+  protected abstract LanguageIdentifierUpdateProcessor createLangIdProcessor(
+      ModifiableSolrParams parameters) throws Exception;
 
   protected void assertLang(String langCode, String... fieldsAndValues) throws 
Exception {
-    if(liProcessor == null)
+    if (liProcessor == null)
       throw new Exception("Processor must be initialized before calling 
assertLang()");
     SolrInputDocument doc = sid(fieldsAndValues);
     assertEquals(langCode, process(doc).getFieldValue(liProcessor.langField));
   }
-  
+
   private SolrInputDocument sid(String... fieldsAndValues) {
     SolrInputDocument doc = new SolrInputDocument();
-    for (int i = 0; i < fieldsAndValues.length; i+=2) {
-      doc.addField(fieldsAndValues[i], fieldsAndValues[i+1]);
+    for (int i = 0; i < fieldsAndValues.length; i += 2) {
+      doc.addField(fieldsAndValues[i], fieldsAndValues[i + 1]);
     }
     return doc;
   }
-  
+
   /*
   Utility test method to process a clone of a document
    */
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java
index 3016324..e09d8d8 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/OpenNLPLangDetectUpdateProcessorFactoryTest.java
@@ -24,22 +24,26 @@ import org.apache.solr.request.SolrQueryRequest;
 import org.junit.Test;
 
 @ThreadLeakLingering(linger = 0)
-public class OpenNLPLangDetectUpdateProcessorFactoryTest extends 
LanguageIdentifierUpdateProcessorFactoryTestCase {
+public class OpenNLPLangDetectUpdateProcessorFactoryTest
+    extends LanguageIdentifierUpdateProcessorFactoryTestCase {
   private static final String TEST_MODEL = 
"opennlp-langdetect.eng-swe-spa-rus-deu.bin";
-  
+
   @Override
-  protected OpenNLPLangDetectUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
-    if (parameters.get("langid.model") == null) { // handle superclass tests 
that don't provide the model filename
+  protected OpenNLPLangDetectUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters)
+      throws Exception {
+    if (parameters.get("langid.model")
+        == null) { // handle superclass tests that don't provide the model 
filename
       parameters.set("langid.model", TEST_MODEL);
     }
-    if (parameters.get("langid.threshold") == null) { // handle superclass 
tests that don't provide confidence threshold
+    if (parameters.get("langid.threshold")
+        == null) { // handle superclass tests that don't provide confidence 
threshold
       parameters.set("langid.threshold", "0.3");
     }
     SolrQueryRequest req = _parser.buildRequestFrom(h.getCore(), new 
ModifiableSolrParams(), null);
     OpenNLPLangDetectUpdateProcessorFactory factory = new 
OpenNLPLangDetectUpdateProcessorFactory();
     factory.init(parameters.toNamedList());
     factory.inform(h.getCore());
-    return (OpenNLPLangDetectUpdateProcessor)factory.getInstance(req, resp, 
null);
+    return (OpenNLPLangDetectUpdateProcessor) factory.getInstance(req, resp, 
null);
   }
 
   // this one actually works better it seems with short docs
@@ -50,7 +54,8 @@ public class OpenNLPLangDetectUpdateProcessorFactoryTest 
extends LanguageIdentif
     return doc;
   }
 
-  @Test @Override
+  @Test
+  @Override
   public void testLangIdGlobal() throws Exception {
     ModifiableSolrParams parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "name,subject");
@@ -59,10 +64,45 @@ public class OpenNLPLangDetectUpdateProcessorFactoryTest 
extends LanguageIdentif
     parameters.add("langid.threshold", "0.3");
     liProcessor = createLangIdProcessor(parameters);
 
-    assertLang("en", "id", "1en", "name", "Lucene", "subject", "Apache Lucene 
is a free/open source information retrieval software library, originally 
created in Java by Doug Cutting. It is supported by the Apache Software 
Foundation and is released under the Apache Software License.");
-    assertLang("sv", "id", "2sv", "name", "Maven", "subject", "Apache Maven är 
ett verktyg utvecklat av Apache Software Foundation och används inom 
systemutveckling av datorprogram i programspråket Java. Maven används för att 
automatiskt paketera (bygga) programfilerna till en distribuerbar enhet. Maven 
används inom samma område som Apache Ant men dess byggfiler är deklarativa till 
skillnad ifrån Ants skriptbaserade.");
-    assertLang("es", "id", "3es", "name", "Lucene", "subject", "Lucene es un 
API de código abierto para recuperación de información, originalmente 
implementada en Java por Doug Cutting. Está apoyado por el Apache Software 
Foundation y se distribuye bajo la Apache Software License. Lucene tiene 
versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, Ruby y 
PHP.");
-    assertLang("ru", "id", "4ru", "name", "Lucene", "subject", "The Apache 
Lucene — это свободная библиотека для высокоскоростного полнотекстового поиска, 
написанная на Java. Может быть использована для поиска в интернете и других 
областях компьютерной лингвистики (аналитическая философия).");
-    assertLang("de", "id", "5de", "name", "Lucene", "subject", "Lucene ist ein 
Freie-Software-Projekt der Apache Software Foundation, das eine Suchsoftware 
erstellt. Durch die hohe Leistungsfähigkeit und Skalierbarkeit können die 
Lucene-Werkzeuge für beliebige Projektgrößen und Anforderungen eingesetzt 
werden. So setzt beispielsweise Wikipedia Lucene für die Volltextsuche ein. 
Zudem verwenden die beiden Desktop-Suchprogramme Beagle und Strigi eine C#- 
bzw. C++- Portierung von Lucene als  [...]
+    assertLang(
+        "en",
+        "id",
+        "1en",
+        "name",
+        "Lucene",
+        "subject",
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.");
+    assertLang(
+        "sv",
+        "id",
+        "2sv",
+        "name",
+        "Maven",
+        "subject",
+        "Apache Maven är ett verktyg utvecklat av Apache Software Foundation 
och används inom systemutveckling av datorprogram i programspråket Java. Maven 
används för att automatiskt paketera (bygga) programfilerna till en 
distribuerbar enhet. Maven används inom samma område som Apache Ant men dess 
byggfiler är deklarativa till skillnad ifrån Ants skriptbaserade.");
+    assertLang(
+        "es",
+        "id",
+        "3es",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene es un API de código abierto para recuperación de información, 
originalmente implementada en Java por Doug Cutting. Está apoyado por el Apache 
Software Foundation y se distribuye bajo la Apache Software License. Lucene 
tiene versiones para otros lenguajes incluyendo Delphi, Perl, C#, C++, Python, 
Ruby y PHP.");
+    assertLang(
+        "ru",
+        "id",
+        "4ru",
+        "name",
+        "Lucene",
+        "subject",
+        "The Apache Lucene — это свободная библиотека для высокоскоростного 
полнотекстового поиска, написанная на Java. Может быть использована для поиска 
в интернете и других областях компьютерной лингвистики (аналитическая 
философия).");
+    assertLang(
+        "de",
+        "id",
+        "5de",
+        "name",
+        "Lucene",
+        "subject",
+        "Lucene ist ein Freie-Software-Projekt der Apache Software Foundation, 
das eine Suchsoftware erstellt. Durch die hohe Leistungsfähigkeit und 
Skalierbarkeit können die Lucene-Werkzeuge für beliebige Projektgrößen und 
Anforderungen eingesetzt werden. So setzt beispielsweise Wikipedia Lucene für 
die Volltextsuche ein. Zudem verwenden die beiden Desktop-Suchprogramme Beagle 
und Strigi eine C#- bzw. C++- Portierung von Lucene als Indexer.");
   }
 }
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/SolrInputDocumentReaderTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/SolrInputDocumentReaderTest.java
index 5e28a52..b6ceda7 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/SolrInputDocumentReaderTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/SolrInputDocumentReaderTest.java
@@ -16,16 +16,15 @@
  */
 package org.apache.solr.update.processor;
 
-import java.util.Arrays;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+import java.util.Arrays;
 import org.apache.solr.common.SolrInputDocument;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class SolrInputDocumentReaderTest {
   private SolrInputDocument doc;
   private String[] allFields;
@@ -41,19 +40,14 @@ public class SolrInputDocumentReaderTest {
     doc.addField("f4", "12345678901234567890");
     allFields = new String[] {"f1", "f2", "f3", "f4"};
   }
-  
+
   @Test
   public void readChunked() throws Exception {
-    SolrInputDocumentReader reader = new SolrInputDocumentReader(
-        doc,
-        allFields,
-        20,
-        18,
-        " - ");
+    SolrInputDocumentReader reader = new SolrInputDocumentReader(doc, 
allFields, 20, 18, " - ");
     assertTrue(reader.ready());
     char[] chars = new char[1000];
     int len;
-    assertEquals(9, len=reader.read(chars, 0, 9));
+    assertEquals(9, len = reader.read(chars, 0, 9));
     assertArrEqu("a b c - m", chars, len);
     len += reader.read(chars, 9, 2);
     assertArrEqu("a b c - mul", chars, len);
@@ -66,13 +60,7 @@ public class SolrInputDocumentReaderTest {
 
   @Test
   public void maxFieldValueLength() throws Exception {
-    SolrInputDocumentReader reader = new SolrInputDocumentReader(
-        doc,
-        allFields,
-        21,
-        2,
-        " - "
-    );
+    SolrInputDocumentReader reader = new SolrInputDocumentReader(doc, 
allFields, 21, 2, " - ");
     assertTrue(reader.ready());
     char[] chars = new char[1000];
     int len = reader.read(chars, 0, 22);
@@ -82,18 +70,14 @@ public class SolrInputDocumentReaderTest {
 
   @Test
   public void allStrFields() throws Exception {
-    SolrInputDocumentReader reader = new SolrInputDocumentReader(
-        doc,
-        20000,
-        10000
-    );
+    SolrInputDocumentReader reader = new SolrInputDocumentReader(doc, 20000, 
10000);
     assertTrue(reader.ready());
     char[] chars = new char[1000];
     int len = reader.read(chars, 0, 1000);
     assertEquals(45, len);
     assertArrEqu("a b c multi valued field 12345678901234567890", chars, len);
   }
-  
+
   @Test
   public void testGetStringFields() throws Exception {
     String[] expected = new String[] {"f1", "f2", "f4"};
@@ -104,5 +88,4 @@ public class SolrInputDocumentReaderTest {
     String str = new String(Arrays.copyOf(chars, len));
     assertEquals(expected, str);
   }
-
-}
\ No newline at end of file
+}
diff --git 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
index 172b892..b110b39 100644
--- 
a/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
+++ 
b/solr/modules/langid/src/test/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessorFactoryTest.java
@@ -20,25 +20,30 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.junit.Test;
 
-public class TikaLanguageIdentifierUpdateProcessorFactoryTest extends 
LanguageIdentifierUpdateProcessorFactoryTestCase {
+public class TikaLanguageIdentifierUpdateProcessorFactoryTest
+    extends LanguageIdentifierUpdateProcessorFactoryTestCase {
   @Override
-  protected LanguageIdentifierUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters) throws Exception {
-    return new 
TikaLanguageIdentifierUpdateProcessor(_parser.buildRequestFrom(h.getCore(), 
parameters, null), resp, null);
+  protected LanguageIdentifierUpdateProcessor 
createLangIdProcessor(ModifiableSolrParams parameters)
+      throws Exception {
+    return new TikaLanguageIdentifierUpdateProcessor(
+        _parser.buildRequestFrom(h.getCore(), parameters, null), resp, null);
   }
 
-
   @Test
   public void testMaxFieldValueChars() throws Exception {
     SolrInputDocument doc = new SolrInputDocument();
-    String valueF1 = "Apache Lucene is a free/open source information 
retrieval software library, originally created in Java by Doug Cutting. It is 
supported by the Apache Software Foundation and is released under the Apache 
Software License.";
-    String valueF2 = "An open-source search server based on the Lucene Java 
search library. News, documentation, resources, and download.";
+    String valueF1 =
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.";
+    String valueF2 =
+        "An open-source search server based on the Lucene Java search library. 
News, documentation, resources, and download.";
     doc.addField("foo_s", valueF1);
 
     ModifiableSolrParams parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "foo_s");
     parameters.add("langid.langField", "language");
     parameters.add("langid.enforceSchema", "false");
-    TikaLanguageIdentifierUpdateProcessor p = 
(TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
+    TikaLanguageIdentifierUpdateProcessor p =
+        (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1, p.concatFields(doc).trim());
 
     parameters = new ModifiableSolrParams();
@@ -73,21 +78,23 @@ public class 
TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageId
     parameters.add("langid.maxFieldValueChars", "100000");
     p = (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1 + " " + valueF2, p.concatFields(doc).trim());
-
-}
+  }
 
   @Test
   public void testMaxTotalChars() throws Exception {
     SolrInputDocument doc = new SolrInputDocument();
-    String valueF1 = "Apache Lucene is a free/open source information 
retrieval software library, originally created in Java by Doug Cutting. It is 
supported by the Apache Software Foundation and is released under the Apache 
Software License.";
-    String valueF2 = "An open-source search server based on the Lucene Java 
search library. News, documentation, resources, and download.";
+    String valueF1 =
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.";
+    String valueF2 =
+        "An open-source search server based on the Lucene Java search library. 
News, documentation, resources, and download.";
     doc.addField("foo_s", valueF1);
 
     ModifiableSolrParams parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "foo_s");
     parameters.add("langid.langField", "language");
     parameters.add("langid.enforceSchema", "false");
-    TikaLanguageIdentifierUpdateProcessor p = 
(TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
+    TikaLanguageIdentifierUpdateProcessor p =
+        (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1, p.concatFields(doc).trim());
 
     parameters = new ModifiableSolrParams();
@@ -122,22 +129,23 @@ public class 
TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageId
     parameters.add("langid.maxTotalChars", "100000");
     p = (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1 + " " + valueF2, p.concatFields(doc).trim());
-
   }
 
-
   @Test
   public void testMaxFieldValueCharsAndMaxTotalChars() throws Exception {
     SolrInputDocument doc = new SolrInputDocument();
-    String valueF1 = "Apache Lucene is a free/open source information 
retrieval software library, originally created in Java by Doug Cutting. It is 
supported by the Apache Software Foundation and is released under the Apache 
Software License.";
-    String valueF2 = "An open-source search server based on the Lucene Java 
search library. News, documentation, resources, and download.";
+    String valueF1 =
+        "Apache Lucene is a free/open source information retrieval software 
library, originally created in Java by Doug Cutting. It is supported by the 
Apache Software Foundation and is released under the Apache Software License.";
+    String valueF2 =
+        "An open-source search server based on the Lucene Java search library. 
News, documentation, resources, and download.";
     doc.addField("foo_s", valueF1);
 
     ModifiableSolrParams parameters = new ModifiableSolrParams();
     parameters.add("langid.fl", "foo_s");
     parameters.add("langid.langField", "language");
     parameters.add("langid.enforceSchema", "false");
-    TikaLanguageIdentifierUpdateProcessor p = 
(TikaLanguageIdentifierUpdateProcessor) createLangIdProcessor(parameters);
+    TikaLanguageIdentifierUpdateProcessor p =
+        (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1, p.concatFields(doc).trim());
 
     parameters = new ModifiableSolrParams();
@@ -175,7 +183,5 @@ public class 
TikaLanguageIdentifierUpdateProcessorFactoryTest extends LanguageId
     parameters.add("langid.maxTotalChars", "100000");
     p = (TikaLanguageIdentifierUpdateProcessor) 
createLangIdProcessor(parameters);
     assertEquals(valueF1 + " " + valueF2, p.concatFields(doc).trim());
-
   }
-
 }

[solr] branch main updated: SOLR-16028: Enable spotless on langid module

Reply via email to