Smalyshev has uploaded a new change for review.
https://gerrit.wikimedia.org/r/277674
Change subject: Add infrastructure for handling different formats.
......................................................................
Add infrastructure for handling different formats.
Bug: T130066
Change-Id: Ia572b27f9d522770fae9419263295eb7cb52f7c7
---
M common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
M gui
M tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
M tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
M tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
5 files changed, 124 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/wikidata/query/rdf
refs/changes/74/277674/1
diff --git
a/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
b/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
index 0b99d2b..d8a39ab 100644
--- a/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
+++ b/common/src/main/java/org/wikidata/query/rdf/common/uri/SchemaDotOrg.java
@@ -13,6 +13,10 @@
*/
public static final String VERSION = NAMESPACE + "version";
/**
+ * Wikibase uses the MediaWiki revision as the version.
+ */
+ public static final String SOFTWARE_VERSION = NAMESPACE +
"softwareVersion";
+ /**
* Wikibase adds this to EntityData with the date of the revision of the
entity.
*/
public static final String DATE_MODIFIED = NAMESPACE + "dateModified";
diff --git a/gui b/gui
index 653f08e..e3a44e2 160000
--- a/gui
+++ b/gui
-Subproject commit 653f08e654eff0bebf34ad088edddf60cb62bde2
+Subproject commit e3a44e2fa619d2ba9fa4bb5e1cff6a6448001063
diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
b/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
index 8f284f3..8f456b9 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/Munge.java
@@ -313,6 +313,9 @@
return;
}
if (subject.equals(Ontology.DUMP)) {
+ if
(statement.getPredicate().stringValue().equals(SchemaDotOrg.SOFTWARE_VERSION)) {
+
munger.setFormatVersion(statement.getObject().stringValue());
+ }
/*
* Just pipe dump statements strait through.
*/
diff --git a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
index 67d1ee5..6e2509a 100644
--- a/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
+++ b/tools/src/main/java/org/wikidata/query/rdf/tool/rdf/Munger.java
@@ -6,10 +6,12 @@
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
+import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
@@ -77,6 +79,28 @@
*/
private boolean keepTypes;
+ /**
+ * Format version we're dealing with.
+ */
+ private String dumpFormatVersion;
+
+ /**
+ * Interface to handle format transformations.
+ */
+ public interface FormatHandler {
+ /**
+ * Transform statement to current latest format.
+ * @param statement
+ * @return Transformed statement or null if it needs to be deleted.
+ */
+ Statement handle(Statement statement);
+ }
+
+ /**
+ * Map of format handlers.
+ */
+ private final Map<String, FormatHandler> formatHandlers;
+
public Munger(WikibaseUris uris) {
this(uris, null, null, false);
}
@@ -87,6 +111,7 @@
this.limitLabelLanguages = limitLabelLanguages;
this.singleLabelModeLanguages = singleLabelModeLanguages;
this.removeSiteLinks = removeSiteLinks;
+ this.formatHandlers = new HashMap<>();
}
/**
@@ -140,6 +165,23 @@
*/
public Munger removeSiteLinks() {
return new Munger(uris, limitLabelLanguages, singleLabelModeLanguages,
true);
+ }
+
+ /**
+ * Set format version.
+ * @param version
+ */
+ public void setFormatVersion(String version) {
+ this.dumpFormatVersion = version;
+ }
+
+ /**
+ * Add handler for specific non-default format.
+ * @param version Version to handle.
+ * @param handler Handler.
+ */
+ public void addFormatHandler(String version, FormatHandler handler) {
+ formatHandlers.put(version, handler);
}
/**
@@ -278,6 +320,11 @@
*/
private String predicate;
+ /**
+ * Format handler for current format.
+ */
+ private FormatHandler formatHandler;
+
public MungeOperation(String entityId, Collection<Statement>
statements, Collection<String> existingValues,
Collection<String> existingRefs) {
this.statements = statements;
@@ -292,6 +339,15 @@
}
this.existingValues = existingValues;
this.existingRefs = existingRefs;
+ setFormatVersion(dumpFormatVersion);
+ }
+
+ /**
+ * Set current version of the format.
+ * @param version
+ */
+ private void setFormatVersion(String version) {
+ this.formatHandler = formatHandlers.get(version);
}
/**
@@ -317,6 +373,25 @@
Iterator<Statement> itr = statements.iterator();
while (itr.hasNext()) {
statement = itr.next();
+ if (formatHandler != null) {
+ Statement handled = formatHandler.handle(statement);
+ if (handled == null) {
+ // drop it
+ itr.remove();
+ continue;
+ } else {
+ if (!handled.equals(statement)) {
+ // modified
+ itr.remove();
+ statement = handled;
+ if (statement()) {
+ // if we accept it in modified form, add back
+ restoredStatements.add(statement);
+ continue;
+ }
+ }
+ }
+ }
if (!statement()) {
itr.remove();
}
@@ -387,6 +462,9 @@
break;
case SchemaDotOrg.DATE_MODIFIED:
lastModified = objectAsLiteral();
+ break;
+ case SchemaDotOrg.SOFTWARE_VERSION:
+ setFormatVersion(objectAsLiteral().stringValue());
break;
default:
// Noop - fall out is ok as we just remove them.
@@ -711,7 +789,7 @@
try {
return (Literal) statement.getObject();
} catch (ClassCastException e) {
- throw new ContainedException("Unexpected Literal in object
position of: " + statement);
+ throw new ContainedException("Expected Literal in object
position of: " + statement);
}
}
diff --git
a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
index bcdaeb5..dec4e9e 100644
--- a/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
+++ b/tools/src/test/java/org/wikidata/query/rdf/tool/rdf/MungerUnitTest.java
@@ -18,6 +18,7 @@
import org.openrdf.model.Statement;
import org.openrdf.model.impl.IntegerLiteralImpl;
import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.vocabulary.XMLSchema;
import org.wikidata.query.rdf.common.uri.Ontology;
import org.wikidata.query.rdf.common.uri.Provenance;
@@ -302,6 +303,19 @@
.test();
}
+ public void formatVersions() {
+ List<Statement> result = entity("Q23")
+ .format("test")
+ .retain(statement("Q23", RDFS.LABEL, new LiteralImpl("george",
"en")))
+ .remove(statement("Q23", RDF.TYPE, new LiteralImpl(Ontology.ITEM)))
+ .remove(statement("Q23", uris.property(PropertyType.DIRECT) +
"P1", new LiteralImpl("deleteme", "en")))
+ .remove(statement("Q23", uris.property(PropertyType.DIRECT) +
"P2", new LiteralImpl("modifyme", "en")))
+ .retain(statement("Q23", uris.property(PropertyType.DIRECT) +
"P3", new LiteralImpl("keepme", "en")))
+ .test();
+ Statement expected = statement("Q23",
uris.property(PropertyType.DIRECT) + "P2", new LiteralImpl("test modified"));
+ assertThat(result, hasItem(expected));
+ }
+
private Mungekin entity(String id) {
return new Mungekin(uris, id);
}
@@ -386,5 +400,29 @@
assertThat(statements, not(hasItem(x)));
}
}
+
+ private Mungekin format(String version) {
+ remove(statement(uris.entityData() + id,
SchemaDotOrg.SOFTWARE_VERSION, new LiteralImpl(version)));
+ munger.addFormatHandler(version, new TestFormatHandler());
+ return this;
+ }
+ }
+
+ private final class TestFormatHandler implements Munger.FormatHandler {
+
+ @Override
+ public Statement handle(Statement statement) {
+ // Delete P1
+ if (statement.getPredicate().stringValue().endsWith("P1")) {
+ return null;
+ }
+ // Modify P2
+ if (statement.getPredicate().stringValue().endsWith("P2")) {
+ return new StatementImpl(statement.getSubject(),
statement.getPredicate(),
+ new LiteralImpl("test modified"));
+ }
+
+ return statement;
+ }
}
}
--
To view, visit https://gerrit.wikimedia.org/r/277674
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia572b27f9d522770fae9419263295eb7cb52f7c7
Gerrit-PatchSet: 1
Gerrit-Project: wikidata/query/rdf
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits