Package: src:tika
Version: 1.22-1
Severity: normal
Tags: patch

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512

Hi,

I'd like to transition json-simple 3.1.1 to unstable, but tika is a blocker 
since it builds against libjson-simple-java << 3 only.

The json-simple classes used by tika were deprecated in version 2.0.0 [1]. 
There were removed in versions 3.x [2].

[1] https://github.com/cliftonlabs/json-simple/blob/json-simple-2.0.0/README.txt
[2] https://github.com/cliftonlabs/json-simple/blob/json-simple-3.0.1/CHANGELOG

Please find attached a patch proposal to use the current json-simple classes. 
I've tested that the package builds correctly against libjson-simple-java 
version 2.3.0-1 from unstable and version 3.1.1-1~exp2 currently in 
experimental. But I don't known how to test the package afterward.

Thanks in advance for considering.

_g.

- -- System Information:
Debian Release: buster/sid
Architecture: amd64 (x86_64)
Foreign Architectures: i386

Kernel: Linux 5.6.0-1-amd64 (SMP w/4 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8), 
LANGUAGE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
LSM: AppArmor: enabled

-----BEGIN PGP SIGNATURE-----

iQEzBAEBCgAdFiEEoJObzArDE05WtIyR7+hsbH/+z4MFAl69ZgQACgkQ7+hsbH/+
z4On6gf+N9/6tHA9M/Rcd5LEYQFt078ti7bcR5dAH5YlXvTtjKRewmBm4p7gl+ts
R7EVnMOO1cRSjecrdzSlUGcInLVwRaDt+1cfC+jGHVq24Jw2U0/Tu9o2OQJvRGHn
nutlzh+o6YNJhfhukOHylXpe8/Jujidl8DMtcOjrAPsqsMle/wRcjNxrRKBMMXd2
Y9MRI6PS8LWFQim/A6rWd1BwhDQwjYt5E5TwjNPrdGoJOPjvDPdTKuBhho0qsN72
7GbAELAP3fbijiUiMvu9NgCrFLkbpse2VDGLjOl1DIoNdxgESVCEjFsJD/Kyr/0J
XkTAioERcub8zPyYfqZNmc0shIO3Tg==
=eJOf
-----END PGP SIGNATURE-----
diff -Nru tika-1.22/debian/changelog tika-1.22/debian/changelog
--- tika-1.22/debian/changelog  2019-08-05 11:41:25.000000000 +0200
+++ tika-1.22/debian/changelog  2020-05-14 15:14:44.000000000 +0200
@@ -1,3 +1,10 @@
+tika (1.22-1.1) UNRELEASED; urgency=medium
+
+  * Non-maintainer upload.
+  * Tentative patch to build against json-simple >= 3
+
+ -- Gilles Filippini <p...@debian.org>  Thu, 14 May 2020 15:14:44 +0200
+
 tika (1.22-1) unstable; urgency=medium
 
   * New upstream release
diff -Nru tika-1.22/debian/maven.rules tika-1.22/debian/maven.rules
--- tika-1.22/debian/maven.rules        2019-08-05 11:14:12.000000000 +0200
+++ tika-1.22/debian/maven.rules        1970-01-01 01:00:00.000000000 +0100
@@ -1,14 +0,0 @@
-
-com.fasterxml.jackson.core jackson-annotations * s/.*/2.x/ * *
-com.fasterxml.jackson.core jackson-core * s/.*/2.x/ * *
-com.fasterxml.jackson.core jackson-databind * s/.*/2.x/ * *
-s/com.github.openjson/org.json/ s/openjson/json/ * s/.*/debian/ * *
-s/org.codelibs/com.uwyn/ jhighlight * s/.*/debian/ * *
-org.bouncycastle s/bcmail-jdk15/bcmail/ * s/.*/debian/ * *
-org.bouncycastle s/bcmail-jdk15on/bcmail/ * s/.*/debian/ * *
-org.bouncycastle s/bcprov-jdk15/bcprov/ * s/.*/debian/ * *
-org.bouncycastle s/bcprov-jdk15on/bcprov/ * s/.*/debian/ * *
-s/biz.aQute/biz.aQute.bnd/ * * s/.*/debian/ * *
-org.apache.pdfbox pdfbox * s/.*/2.x/ * *
-org.apache.pdfbox pdfbox-tools * s/.*/2.x/ * *
-s/javax.annotation/org.apache.geronimo.specs/ 
s/javax.annotation-api/geronimo-annotation_1.3_spec/ * s/.*/debian/ * *
diff -Nru tika-1.22/debian/maven.rules.in tika-1.22/debian/maven.rules.in
--- tika-1.22/debian/maven.rules.in     1970-01-01 01:00:00.000000000 +0100
+++ tika-1.22/debian/maven.rules.in     2020-05-14 14:38:29.000000000 +0200
@@ -0,0 +1,15 @@
+
+com.fasterxml.jackson.core jackson-annotations * s/.*/2.x/ * *
+com.fasterxml.jackson.core jackson-core * s/.*/2.x/ * *
+com.fasterxml.jackson.core jackson-databind * s/.*/2.x/ * *
+s/com.github.openjson/org.json/ s/openjson/json/ * s/.*/debian/ * *
+s/org.codelibs/com.uwyn/ jhighlight * s/.*/debian/ * *
+org.bouncycastle s/bcmail-jdk15/bcmail/ * s/.*/debian/ * *
+org.bouncycastle s/bcmail-jdk15on/bcmail/ * s/.*/debian/ * *
+org.bouncycastle s/bcprov-jdk15/bcprov/ * s/.*/debian/ * *
+org.bouncycastle s/bcprov-jdk15on/bcprov/ * s/.*/debian/ * *
+s/biz.aQute/biz.aQute.bnd/ * * s/.*/debian/ * *
+org.apache.pdfbox pdfbox * s/.*/2.x/ * *
+org.apache.pdfbox pdfbox-tools * s/.*/2.x/ * *
+s/javax.annotation/org.apache.geronimo.specs/ 
s/javax.annotation-api/geronimo-annotation_1.3_spec/ * s/.*/debian/ * *
+s/com.googlecode.json-simple/@JSON_SIMPLE_MAVEN@/ json-simple * s/.*/debian/ * 
*
diff -Nru tika-1.22/debian/patches/14-json-simple-3.patch 
tika-1.22/debian/patches/14-json-simple-3.patch
--- tika-1.22/debian/patches/14-json-simple-3.patch     1970-01-01 
01:00:00.000000000 +0100
+++ tika-1.22/debian/patches/14-json-simple-3.patch     2020-05-14 
15:14:44.000000000 +0200
@@ -0,0 +1,48 @@
+Description: Migrate away from deprecated json-simple 1.x classes
+ See json-simple 2.0.0 changelog:
+ > * Deprecated JSONParse and JSONValue in favor of Jsoner.
+ > * Deprecated JSONStreamAware and JSONAware in favor of Jsonable.
+ > * Deprecated JSONObject in favor of JsonObject.
+ > * Deprecated JSONArray in favor of JsonArray.
+ .
+ This patch uses the new json-simple Json* classes. It is compatible with
+ both 2.x and 3.x json-simple releases, with a few ajustments regarding
+ backward incompatible changes in json-simple 3.x:
+ - The package name, changed to com.github.cliftonlabs.json_simple
+ - The maven groupId renamed as com.github.cliftonlabs
+ These two changes are handled using place-holders @JSON_SIMPLE_PACKAGE@ and
+ @JSON_SIMPLE_MAVEN@ which are substituted at build time by debian/rules.
+ .
+ With these tricks the package is compatible with json-simple 2.x and 3.x.
+Author: Gilles Filippini <p...@debian.org>
+Index: 
tika-1.22/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java
+===================================================================
+--- 
tika-1.22.orig/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java
++++ 
tika-1.22/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java
+@@ -46,9 +46,9 @@ import org.apache.tika.mime.MediaType;
+ import org.apache.tika.parser.ParseContext;
+ import org.apache.tika.parser.recognition.ObjectRecogniser;
+ import org.apache.tika.parser.captioning.CaptionObject;
+-import org.json.simple.JSONArray;
+-import org.json.simple.JSONObject;
+-import org.json.simple.parser.JSONParser;
++import @JSON_SIMPLE_PACKAGE@.JsonArray;
++import @JSON_SIMPLE_PACKAGE@.JsonObject;
++import @JSON_SIMPLE_PACKAGE@.Jsoner;
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ import org.xml.sax.ContentHandler;
+@@ -149,10 +149,10 @@ public class TensorflowRESTCaptioner imp
+             try (InputStream reply = response.getEntity().getContent()) {
+                 String replyMessage = IOUtils.toString(reply);
+                 if (response.getStatusLine().getStatusCode() == 200) {
+-                    JSONObject jReply = (JSONObject) new 
JSONParser().parse(replyMessage);
+-                    JSONArray jCaptions = (JSONArray) jReply.get("captions");
++                    JsonObject jReply = (JsonObject) 
Jsoner.deserialize(replyMessage);
++                    JsonArray jCaptions = (JsonArray) jReply.get("captions");
+                     for (int i = 0; i < jCaptions.size(); i++) {
+-                        JSONObject jCaption = (JSONObject) jCaptions.get(i);
++                        JsonObject jCaption = (JsonObject) jCaptions.get(i);
+                         String sentence = (String) jCaption.get("sentence");
+                         Double confidence = (Double) 
jCaption.get("confidence");
+                         capObjs.add(new CaptionObject(sentence, LABEL_LANG, 
confidence));
diff -Nru tika-1.22/debian/patches/series tika-1.22/debian/patches/series
--- tika-1.22/debian/patches/series     2019-08-05 11:14:12.000000000 +0200
+++ tika-1.22/debian/patches/series     2020-05-14 14:35:07.000000000 +0200
@@ -6,3 +6,4 @@
 11-java11-compatibility.patch
 12-json-compatibility.patch
 13-missing-dependencies.patch
+14-json-simple-3.patch
diff -Nru tika-1.22/debian/rules tika-1.22/debian/rules
--- tika-1.22/debian/rules      2019-08-05 11:14:12.000000000 +0200
+++ tika-1.22/debian/rules      2020-05-14 15:09:13.000000000 +0200
@@ -1,4 +1,35 @@
 #!/usr/bin/make -f
 
+# debian: bullseye - transitioning
+JSON_SIMPLE_VERSION = $(shell dpkg -l libjson-simple-java | grep '^ii' | awk 
'{print $$3}')
+JSON_SIMPLE_3 = $(shell dpkg --compare-versions '$(JSON_SIMPLE_VERSION)' '>' 
'3.1.1-1~' && echo yes || echo no)
+ifeq (yes,$(JSON_SIMPLE_3))
+JSON_SIMPLE_PACKAGE = com.github.cliftonlabs.json_simple
+JSON_SIMPLE_MAVEN = com.github.cliftonlabs
+else
+JSON_SIMPLE_PACKAGE = org.json.simple
+JSON_SIMPLE_MAVEN = com.googlecode.json-simple
+
+endif
+
 %:
        dh $@
+
+debian/maven.rules: debian/maven.rules.in
+       sed 's/@JSON_SIMPLE_MAVEN@/$(JSON_SIMPLE_MAVEN)/' $< >$@
+
+override_dh_auto_configure: debian/maven.rules
+       dh_auto_configure
+
+override_dh_auto_build:
+       sed -i.json-simple \
+         's/@JSON_SIMPLE_PACKAGE@/$(JSON_SIMPLE_PACKAGE)/' \
+         
tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java
+       dh_auto_build
+
+override_dh_auto_clean:
+       dh_auto_clean
+       rm -f debian/maven.rules
+       [ ! -f 
tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java.json-simple
 ] || \
+         mv 
tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java.json-simple
 \
+            
tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java

Reply via email to