This is an automated email from the ASF dual-hosted git repository.
krickert pushed a change to branch OPENNLP-1833-grpc-expansion
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
from 5d01e208 Add remote embedding backends (TEI, OpenVINO) and live
integration tests
add b6605eb4 OPENNLP-1833 - Add POS, lemma and language detect steps;
split analyzer into focused helpers
No new revisions were added by this update.
Summary of changes:
opennlp-grpc/README.md | 37 +-
.../opennlp/grpc/it/OpenNlpGrpcServerLiveIT.java | 76 ++-
opennlp-grpc/opennlp-grpc-service/pom.xml | 34 ++
.../grpc/embedding/EmbeddingProviderFactory.java | 1 +
.../{ => cuda}/CudaEmbeddingBackendFactory.java | 5 +-
.../{ => cuda}/CudaEmbeddingProvider.java | 4 +-
.../{ => onnx}/AbstractOnnxEmbeddingProvider.java | 7 +-
.../grpc/embedding/{ => onnx}/BertTokenizer.java | 2 +-
.../{ => onnx}/OnnxEmbeddingBackendFactory.java | 5 +-
.../{ => onnx}/OnnxRuntimeEmbeddingProvider.java | 2 +-
.../embedding/{ => onnx}/OnnxSentenceEmbedder.java | 2 +-
.../opennlp/grpc/model/ModelBundleCache.java | 226 ++++++---
.../grpc/processor/BasicDocumentAnalyzer.java | 523 ---------------------
.../opennlp/grpc/processor/PipelineStepPolicy.java | 3 +
.../processor/basic/AnalysisRequestValidator.java | 149 ++++++
.../processor/basic/BasicDocumentAnalyzer.java | 256 ++++++++++
.../grpc/processor/basic/ClassicStepRunner.java | 199 ++++++++
.../processor/basic/DocumentOffsetEncoder.java | 85 ++++
.../grpc/processor/basic/EmbedChunkStepRunner.java | 132 ++++++
.../grpc/processor/{ => basic}/OffsetMapper.java | 2 +-
.../grpc/processor/basic/StepDiagnostics.java | 45 ++
.../opennlp/grpc/server/OpenNlpGrpcServer.java | 2 +-
....opennlp.grpc.embedding.EmbeddingBackendFactory | 4 +-
.../embedding/EmbeddingProviderFactoryTest.java | 4 +
.../embedding/{ => onnx}/BertTokenizerTest.java | 2 +-
.../BasicDocumentAnalyzerChunkEmbedTest.java | 2 +-
.../BasicDocumentAnalyzerEmbeddingTest.java | 3 +-
.../BasicDocumentAnalyzerLanguageDetectTest.java | 88 ++++
.../BasicDocumentAnalyzerPolicyTest.java | 3 +-
.../basic/BasicDocumentAnalyzerPosLemmaTest.java | 155 ++++++
.../BasicDocumentAnalyzerSemanticChunkTest.java | 2 +-
.../{ => basic}/BasicDocumentAnalyzerTest.java | 2 +-
.../processor/{ => basic}/OffsetMapperTest.java | 2 +-
33 files changed, 1444 insertions(+), 620 deletions(-)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> cuda}/CudaEmbeddingBackendFactory.java (88%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> cuda}/CudaEmbeddingProvider.java (93%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/AbstractOnnxEmbeddingProvider.java (97%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/BertTokenizer.java (99%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/OnnxEmbeddingBackendFactory.java (88%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/OnnxRuntimeEmbeddingProvider.java (96%)
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/OnnxSentenceEmbedder.java (99%)
delete mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/BasicDocumentAnalyzer.java
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/AnalysisRequestValidator.java
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/BasicDocumentAnalyzer.java
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/ClassicStepRunner.java
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/DocumentOffsetEncoder.java
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/EmbedChunkStepRunner.java
rename
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/{
=> basic}/OffsetMapper.java (98%)
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/main/java/org/apache/opennlp/grpc/processor/basic/StepDiagnostics.java
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/embedding/{
=> onnx}/BertTokenizerTest.java (99%)
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/BasicDocumentAnalyzerChunkEmbedTest.java (98%)
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/BasicDocumentAnalyzerEmbeddingTest.java (97%)
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/basic/BasicDocumentAnalyzerLanguageDetectTest.java
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/BasicDocumentAnalyzerPolicyTest.java (98%)
create mode 100644
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/basic/BasicDocumentAnalyzerPosLemmaTest.java
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/BasicDocumentAnalyzerSemanticChunkTest.java (98%)
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/BasicDocumentAnalyzerTest.java (98%)
rename
opennlp-grpc/opennlp-grpc-service/src/test/java/org/apache/opennlp/grpc/processor/{
=> basic}/OffsetMapperTest.java (98%)