This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push: new 6d8dbcdd3 Update Grobid parsers (#1280) 6d8dbcdd3 is described below commit 6d8dbcdd35f448623a0080ec0ecdc62f93dc1359 Author: Luca Foppiano <lfoppi...@users.noreply.github.com> AuthorDate: Wed Aug 16 00:23:48 2023 +0900 Update Grobid parsers (#1280) * Update default values * improve check mechanism, correct config file --- .../tika/parser/journal/GrobidRESTParser.java | 7 ++--- .../tika/parser/ner/grobid/GrobidNERecogniser.java | 31 +++++++++++++++------- .../tika/parser/journal/GrobidExtractor.properties | 2 +- .../tika/parser/ner/grobid/GrobidServer.properties | 4 +-- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java index cc2841880..d7aedd660 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java @@ -41,7 +41,7 @@ public class GrobidRESTParser { private static final Logger LOG = LoggerFactory.getLogger(GrobidRESTParser.class); - private static final String GROBID_REST_HOST = "http://localhost:8080"; + private static final String GROBID_REST_HOST = "http://localhost:8070"; private static final String GROBID_ISALIVE_PATH = "/api/isalive"; private static final String GROBID_PROCESSHEADER_PATH = "/api/processHeaderDocument"; private static final String GROBID_LEGACY_ISALIVE_PATH = "/grobid"; @@ -96,8 +96,9 @@ public class GrobidRESTParser { try { checkMode(); Response response = WebClient.create(restHostUrlStr + - (legacyMode ? GROBID_LEGACY_PROCESSHEADER_PATH : GROBID_PROCESSHEADER_PATH)) - .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA) + (legacyMode ? GROBID_LEGACY_PROCESSHEADER_PATH : GROBID_PROCESSHEADER_PATH)) + .accept(MediaType.APPLICATION_XML) + .type(MediaType.MULTIPART_FORM_DATA) .post(body); diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java index 1f173e381..1812b4b82 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/grobid/GrobidNERecogniser.java @@ -47,10 +47,11 @@ public class GrobidNERecogniser implements NERecogniser { add("NORMALIZED_MEASUREMENTS"); add("MEASUREMENT_TYPES"); } - }; + }; private static final Logger LOG = LoggerFactory.getLogger(GrobidNERecogniser.class); - private static final String GROBID_REST_HOST = "http://localhost:8080"; - private static boolean available = false; + private static final String GROBID_REST_HOST = "http://localhost:8060"; + private static final String ISALIVE_URL = "/service/isalive"; + private boolean available = false; private String restHostUrlStr; @@ -62,7 +63,6 @@ public class GrobidNERecogniser implements NERecogniser { restHostUrlStr = readRestUrl(); } catch (IOException e) { LOG.warn("couldn't read rest url", e); - } if (restHostUrlStr == null || restHostUrlStr.equals("")) { @@ -71,18 +71,30 @@ public class GrobidNERecogniser implements NERecogniser { this.restHostUrlStr = restHostUrlStr; } + this.available = isServerAlive(restHostUrlStr); + + } catch (Exception e) { + LOG.info(e.getMessage(), e); + } + } + + private static boolean isServerAlive(String restHostUrlStr) { + boolean available = false; + try { Response response = - WebClient.create(restHostUrlStr).accept(MediaType.APPLICATION_JSON).get(); + WebClient.create(restHostUrlStr + ISALIVE_URL) + .get(); int responseCode = response.getStatus(); if (responseCode == 200) { available = true; } else { - LOG.info("Grobid REST Server is not running"); + LOG.info("Grobid Quantities REST Server is not running"); } - } catch (Exception e) { - LOG.info(e.getMessage(), e); + LOG.info("Grobid Quantities REST Server is not running", e); } + + return available; } /** @@ -173,7 +185,8 @@ public class GrobidNERecogniser implements NERecogniser { try { String url = restHostUrlStr + readRestEndpoint(); Response response = - WebClient.create(url).accept(MediaType.APPLICATION_JSON).post("text=" + text); + WebClient.create(url).accept(MediaType.APPLICATION_JSON) + .post("text=" + text); int responseCode = response.getStatus(); if (responseCode == 200) { diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/journal/GrobidExtractor.properties b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/journal/GrobidExtractor.properties index 488f0c593..002b0430c 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/journal/GrobidExtractor.properties +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/journal/GrobidExtractor.properties @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -grobid.server.url=http://localhost:8080 +grobid.server.url=http://localhost:8070 diff --git a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties index 3fc609af4..0803e38fd 100644 --- a/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties +++ b/tika-parsers/tika-parsers-ml/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties @@ -13,5 +13,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -grobid.server.url=http://localhost:8080 -grobid.endpoint.text=/processQuantityText +grobid.server.url=http://localhost:8060 +grobid.endpoint.text=/service/processQuantityText