Smalyshev has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/277674

Change subject: Add infrastructure for handling different formats.
......................................................................

Add infrastructure for handling different formats.

Bug: T130066
Change-Id: Ia572b27f9d522770fae9419263295eb7cb52f7c7
---
M common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
M gui
M tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
M tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
M tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
5 files changed, 124 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf 
refs/changes/74/277674/1

diff --git 
a/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java 
b/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
index 0b99d2b..d8a39ab 100644
--- a/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
+++ b/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
@@ -13,6 +13,10 @@
      */
     public static final String VERSION = NAMESPACE + "version";
     /**
+     * Wikibase uses the MediaWiki revision as the version.
+     */
+    public static final String SOFTWARE_VERSION = NAMESPACE + 
"softwareVersion";
+    /**
      * Wikibase adds this to EntityData with the date of the revision of the 
entity.
      */
     public static final String DATE_MODIFIED = NAMESPACE + "dateModified";
diff --git a/gui b/gui
index 653f08e..e3a44e2 160000
--- a/gui
+++ b/gui
-Subproject commit 653f08e654eff0bebf34ad088edddf60cb62bde2
+Subproject commit e3a44e2fa619d2ba9fa4bb5e1cff6a6448001063
diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
index 8f284f3..8f456b9 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
@@ -313,6 +313,9 @@
                 return;
             }
             if (subject.equals(Ontology.DUMP)) {
+                if 
(statement.getPredicate().stringValue().equals(SchemaDotOrg.SOFTWARE_VERSION)) {
+                    
munger.setFormatVersion(statement.getObject().stringValue());
+                }
                 /*
                  * Just pipe dump statements strait through.
                  */
diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java 
b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
index 67d1ee5..6e2509a 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
@@ -6,10 +6,12 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
+import java.util.Map;
 import java.util.Set;
 import java.util.TimeZone;
 
@@ -77,6 +79,28 @@
      */
     private boolean keepTypes;
 
+    /**
+     * Format version we're dealing with.
+     */
+    private String dumpFormatVersion;
+
+    /**
+     * Interface to handle format transformations.
+     */
+    public interface FormatHandler {
+        /**
+         * Transform statement to current latest format.
+         * @param statement
+         * @return Transformed statement or null if it needs to be deleted.
+         */
+        Statement handle(Statement statement);
+    }
+
+    /**
+     * Map of format handlers.
+     */
+    private final Map<String, FormatHandler> formatHandlers;
+
     public Munger(WikibaseUris uris) {
         this(uris, null, null, false);
     }
@@ -87,6 +111,7 @@
         this.limitLabelLanguages = limitLabelLanguages;
         this.singleLabelModeLanguages = singleLabelModeLanguages;
         this.removeSiteLinks = removeSiteLinks;
+        this.formatHandlers = new HashMap<>();
     }
 
     /**
@@ -140,6 +165,23 @@
      */
     public Munger removeSiteLinks() {
         return new Munger(uris, limitLabelLanguages, singleLabelModeLanguages, 
true);
+    }
+
+    /**
+     * Set format version.
+     * @param version
+     */
+    public void setFormatVersion(String version) {
+        this.dumpFormatVersion = version;
+    }
+
+    /**
+     * Add handler for specific non-default format.
+     * @param version Version to handle.
+     * @param handler Handler.
+     */
+    public void addFormatHandler(String version, FormatHandler handler) {
+        formatHandlers.put(version, handler);
     }
 
     /**
@@ -278,6 +320,11 @@
          */
         private String predicate;
 
+        /**
+         * Format handler for current format.
+         */
+        private FormatHandler formatHandler;
+
         public MungeOperation(String entityId, Collection<Statement> 
statements, Collection<String> existingValues,
                 Collection<String> existingRefs) {
             this.statements = statements;
@@ -292,6 +339,15 @@
             }
             this.existingValues = existingValues;
             this.existingRefs = existingRefs;
+            setFormatVersion(dumpFormatVersion);
+        }
+
+        /**
+         * Set current version of the format.
+         * @param version
+         */
+        private void setFormatVersion(String version) {
+            this.formatHandler = formatHandlers.get(version);
         }
 
         /**
@@ -317,6 +373,25 @@
             Iterator<Statement> itr = statements.iterator();
             while (itr.hasNext()) {
                 statement = itr.next();
+                if (formatHandler != null) {
+                    Statement handled = formatHandler.handle(statement);
+                    if (handled == null) {
+                        // drop it
+                        itr.remove();
+                        continue;
+                    } else {
+                        if (!handled.equals(statement)) {
+                            // modified
+                            itr.remove();
+                            statement = handled;
+                            if (statement()) {
+                                // if we accept it in modified form, add back
+                                restoredStatements.add(statement);
+                                continue;
+                            }
+                        }
+                    }
+                }
                 if (!statement()) {
                     itr.remove();
                 }
@@ -387,6 +462,9 @@
                 break;
             case SchemaDotOrg.DATE_MODIFIED:
                 lastModified = objectAsLiteral();
+                break;
+            case SchemaDotOrg.SOFTWARE_VERSION:
+                setFormatVersion(objectAsLiteral().stringValue());
                 break;
             default:
                 // Noop - fall out is ok as we just remove them.
@@ -711,7 +789,7 @@
             try {
                 return (Literal) statement.getObject();
             } catch (ClassCastException e) {
-                throw new ContainedException("Unexpected Literal in object 
position of:  " + statement);
+                throw new ContainedException("Expected Literal in object 
position of:  " + statement);
             }
         }
 
diff --git 
a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java 
b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
index bcdaeb5..dec4e9e 100644
--- a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
+++ b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
@@ -18,6 +18,7 @@
 import org.openrdf.model.Statement;
 import org.openrdf.model.impl.IntegerLiteralImpl;
 import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.impl.StatementImpl;
 import org.openrdf.model.vocabulary.XMLSchema;
 import org.wikidata.query.rdf.common.uri.Ontology;
 import org.wikidata.query.rdf.common.uri.Provenance;
@@ -302,6 +303,19 @@
                 .test();
     }
 
+    public void formatVersions() {
+        List<Statement> result = entity("Q23")
+            .format("test")
+            .retain(statement("Q23", RDFS.LABEL, new LiteralImpl("george", 
"en")))
+            .remove(statement("Q23", RDF.TYPE, new LiteralImpl(Ontology.ITEM)))
+            .remove(statement("Q23", uris.property(PropertyType.DIRECT) + 
"P1", new LiteralImpl("deleteme", "en")))
+            .remove(statement("Q23", uris.property(PropertyType.DIRECT) + 
"P2", new LiteralImpl("modifyme", "en")))
+            .retain(statement("Q23", uris.property(PropertyType.DIRECT) + 
"P3", new LiteralImpl("keepme", "en")))
+            .test();
+        Statement expected = statement("Q23", 
uris.property(PropertyType.DIRECT) + "P2", new LiteralImpl("test modified"));
+        assertThat(result, hasItem(expected));
+    }
+
     private Mungekin entity(String id) {
         return new Mungekin(uris, id);
     }
@@ -386,5 +400,29 @@
                 assertThat(statements, not(hasItem(x)));
             }
         }
+
+        private Mungekin format(String version) {
+            remove(statement(uris.entityData() + id, 
SchemaDotOrg.SOFTWARE_VERSION, new LiteralImpl(version)));
+            munger.addFormatHandler(version, new TestFormatHandler());
+            return this;
+        }
+    }
+
+    private final class TestFormatHandler implements Munger.FormatHandler {
+
+        @Override
+        public Statement handle(Statement statement) {
+            // Delete P1
+            if (statement.getPredicate().stringValue().endsWith("P1")) {
+                return null;
+            }
+            // Modify P2
+            if (statement.getPredicate().stringValue().endsWith("P2")) {
+                return new StatementImpl(statement.getSubject(), 
statement.getPredicate(),
+                        new LiteralImpl("test modified"));
+            }
+
+            return statement;
+        }
     }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/277674
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia572b27f9d522770fae9419263295eb7cb52f7c7
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to