This is an automated email from the ASF dual-hosted git repository.
hutran pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 73cbd9f [Gobblin-799][GOBBLIN-799] Fix bug in
AvroSchemaCheckDefaultStrategy
73cbd9f is described below
commit 73cbd9f3e66713dc12b4209d802b09b2ecd07fe3
Author: Zihan Li <[email protected]>
AuthorDate: Mon Jun 17 09:57:16 2019 -0700
[Gobblin-799][GOBBLIN-799] Fix bug in AvroSchemaCheckDefaultStrategy
Closes #2666 from ZihanLi58/schemaCheckBug
---
.../AvroSchemaCheckDefaultStrategy.java | 2 +
.../util/AvroSchemaCheckDefaultStrategyTest.java | 51 +++++
.../expectedSchema.avsc | 239 +++++++++++++++++++++
.../toValidateSchema.avsc | 199 +++++++++++++++++
4 files changed, 491 insertions(+)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/schema_check/AvroSchemaCheckDefaultStrategy.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/schema_check/AvroSchemaCheckDefaultStrategy.java
index 1c7696d..b094183 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/schema_check/AvroSchemaCheckDefaultStrategy.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/schema_check/AvroSchemaCheckDefaultStrategy.java
@@ -59,6 +59,7 @@ public class AvroSchemaCheckDefaultStrategy implements
AvroSchemaCheckStrategy {
if (toValidate.getFixedSize() != expected.getFixedSize()) {
return false;
}
+ return true;
}
case ENUM: {
// expected symbols must contain all toValidate symbols:
@@ -70,6 +71,7 @@ public class AvroSchemaCheckDefaultStrategy implements
AvroSchemaCheckStrategy {
if (!expectedSymbols.containsAll(toValidateSymbols)) {
return false;
}
+ return true;
}
case RECORD: {
diff --git
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/util/AvroSchemaCheckDefaultStrategyTest.java
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/util/AvroSchemaCheckDefaultStrategyTest.java
new file mode 100644
index 0000000..c57e188
--- /dev/null
+++
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/util/AvroSchemaCheckDefaultStrategyTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.data.management.util;
+
+import java.io.File;
+import org.apache.avro.Schema;
+import org.apache.gobblin.util.schema_check.AvroSchemaCheckDefaultStrategy;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+
+public class AvroSchemaCheckDefaultStrategyTest {
+ @Test
+ public void testSchemCheckStrategy() throws Exception {
+ //test when it's compatible
+ Schema toValidate = new
Schema.Parser().parse("{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"long\"],\"default\":null}]}");
+ Schema expected = new
Schema.Parser().parse("{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"long\"],\"doc\":\"this
is for test\",\"default\":null}]}");
+ AvroSchemaCheckDefaultStrategy strategy = new
AvroSchemaCheckDefaultStrategy();
+ org.junit.Assert.assertTrue(strategy.compare(expected, toValidate));
+
+ //test when field name is different
+ expected = new
Schema.Parser().parse("{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo1\",\"type\":[\"null\",\"long\"],\"doc\":\"this
is for test\",\"default\":null}]}");
+ org.junit.Assert.assertFalse(strategy.compare(expected, toValidate));
+
+ //test when the type change
+ expected = new
Schema.Parser().parse("{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"doc\":\"this
is for test\",\"default\":null}]}");
+ org.junit.Assert.assertFalse(strategy.compare(expected, toValidate));
+ expected = new
Schema.Parser().parse("{\"type\":\"record\",\"name\":\"baseRecord\",\"fields\":[{\"name\":\"foo\",\"type\":[\"null\",\"float\"],\"doc\":\"this
is for test\",\"default\":null}]}");
+ Assert.assertFalse(strategy.compare(expected, toValidate));
+
+ //test complex schema
+ toValidate = new Schema.Parser().parse(new
File(AvroSchemaCheckDefaultStrategy.class.getClassLoader().getResource("avroSchemaCheckStrategyTest/toValidateSchema.avsc").getFile()));
+ expected = new Schema.Parser().parse(new
File(AvroSchemaCheckDefaultStrategy.class.getClassLoader().getResource("avroSchemaCheckStrategyTest/expectedSchema.avsc").getFile()));
+ Assert.assertTrue(strategy.compare(expected, toValidate));
+ }
+}
diff --git
a/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/expectedSchema.avsc
b/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/expectedSchema.avsc
new file mode 100644
index 0000000..11d6102
--- /dev/null
+++
b/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/expectedSchema.avsc
@@ -0,0 +1,239 @@
+{
+ "type" : "record",
+ "name" : "OfflineComplianceRequestEvent",
+ "namespace" : "com.linkedin.events.compliance",
+ "fields" : [ {
+ "name" : "header",
+ "type" : {
+ "type" : "record",
+ "name" : "EventHeader",
+ "namespace" : "com.linkedin.events",
+ "fields" : [ {
+ "name" : "memberId",
+ "type" : "int",
+ "doc" : "The LinkedIn member ID of the user initiating the action.
LinkedIn member IDs are integers greater than zero. Guests are represented
either as zero or a negative number."
+ }, {
+ "name" : "viewerUrn",
+ "type" : [ "null", "string" ],
+ "doc" : "The LinkedIn URN of the user initiating the action. For
other applications like Slideshare, this should be filled in when the LinkedIn
member URN is actually known. The LinkedIn member URN would be known, for
example, when the user has linked their Slideshare account with their LinkedIn
account.",
+ "default" : null
+ }, {
+ "name" : "applicationViewerUrn",
+ "type" : [ "null", "string" ],
+ "doc" : "The Application URN of the user initiating the action. This
URN identifies the member within the particular application that the member is
using, which may or may not be LinkedIn. If the user is interacting with
LinkedIn then this should be the LinkedIn URN, the same as viewerUrn. If the
member is interacting with a different site, such as Slideshare, then this
should be the URN identifying the member in that site.",
+ "default" : null
+ }, {
+ "name" : "csUserUrn",
+ "type" : [ "null", "string" ],
+ "doc" : "The URN of the CS user initiating the action. A CS user is
essentially a LinkedIn member with elevated permissions and can perform Admin
actions on a page. A non-null value would indicate CS activity on the website.
This field is different from the impersonatorId. ImpersonatorId will be
populated when a CS user is logged in as (or impersonating) another member. On
the other hand, this field will be populated when a CS user logged in as
himself has elevated permissions to [...]
+ "default" : null
+ }, {
+ "name" : "time",
+ "type" : "long",
+ "doc" : "The time of the event"
+ }, {
+ "name" : "server",
+ "type" : "string",
+ "doc" : "The name of the server"
+ }, {
+ "name" : "service",
+ "type" : "string",
+ "doc" : "The name of the service. Synonymous to the
com.linkedin.events.monitoring.EventHeader#container field."
+ }, {
+ "name" : "environment",
+ "type" : [ "string", "null" ],
+ "doc" : "The environment the service is running in",
+ "default" : ""
+ }, {
+ "name" : "guid",
+ "type" : {
+ "type" : "fixed",
+ "name" : "Guid",
+ "size" : 16
+ },
+ "doc" : "A unique identifier for the message"
+ }, {
+ "name" : "treeId",
+ "type" : [ "null", {
+ "type" : "fixed",
+ "name" : "fixed_16",
+ "size" : 16
+ } ],
+ "doc" : "Service call tree uuid",
+ "default" : null
+ }, {
+ "name" : "requestId",
+ "type" : [ "null", "int" ],
+ "doc" : "Service call request id",
+ "default" : null
+ }, {
+ "name" : "impersonatorId",
+ "type" : [ "null", "string" ],
+ "doc" : "this is the ID of the CS Agent or Application acting on the
users behalf",
+ "default" : null
+ }, {
+ "name" : "version",
+ "type" : [ "null", "string" ],
+ "doc" : "Synonymous to the
com.linkedin.events.monitoring.EventHeader#version field. The version that the
service which emitted this event was at. For services in multiproducts, this
usually comes in the form of {major}.{minor}.{micro} (eg. 0.1.2), however for
network services, the version follows a format like so: 0.0.2000-RC8.35047",
+ "default" : null
+ }, {
+ "name" : "instance",
+ "type" : [ "null", "string" ],
+ "doc" : "Synonymous to the
com.linkedin.events.monitoring.EventHeader#instance field. The instance ID of
the service (eg. i001)",
+ "default" : null
+ }, {
+ "name" : "appName",
+ "type" : [ "null", "string" ],
+ "doc" : "Synonymous to the
com.linkedin.events.monitoring.EventHeader#service field. Named 'appName' here
since this is what this field actually represents, and 'service' is already
used. This is also synonymous to 'appName' in Play and network apps, where on a
typical page there would be a <meta name=appName content=biz> tag. For network
apps, this would be the container name without the '-tomcat' suffix. So for
'profile-tomcat', it would just be 'profile'. For Play! services, i [...]
+ "default" : null
+ }, {
+ "name" : "testId",
+ "type" : [ "null", "string" ],
+ "doc" : "A client provided ID that uniquely identifies a particular
execution of a test case. This ID is provided by clients through an
ENG_TEST_ID cookie. The Selenium test framework automatically sets this cookie
for each request. This will be null when there is no ENG_TEST_ID provided.
See
https://iwww.corp.linkedin.com/wiki/cf/display/ENGS/Selenium+Framework+Architecture+Documentation
for more details on the test framework. See
https://iwww.corp.linkedin.com/wiki/cf/disp [...]
+ "default" : null
+ }, {
+ "name" : "testSegmentId",
+ "type" : [ "null", "string" ],
+ "doc" : "A client provided ID that uniquely identifies a section of
the testing code from a particular execution of a test case. This ID is
provided by clients through an ENG_TEST_SEGMENT_ID cookie. ",
+ "default" : null
+ }, {
+ "name" : "auditHeader",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "KafkaAuditHeader",
+ "fields" : [ {
+ "name" : "time",
+ "type" : "long",
+ "doc" : "The time at which the event was emitted into kafka."
+ }, {
+ "name" : "server",
+ "type" : "string",
+ "doc" : "The fully qualified name of the host from which the event
is being emitted."
+ }, {
+ "name" : "instance",
+ "type" : [ "null", "string" ],
+ "doc" : "The instance on the server from which the event is being
emitted. e.g. i001"
+ }, {
+ "name" : "appName",
+ "type" : "string",
+ "doc" : "The name of the application from which the event is being
emitted. see go/appname"
+ }, {
+ "name" : "messageId",
+ "type" : {
+ "type" : "fixed",
+ "name" : "UUID",
+ "size" : 16
+ },
+ "doc" : "A unique identifier for the message"
+ }, {
+ "name" : "auditVersion",
+ "type" : [ "null", "int" ],
+ "doc" : "The version that is being used for auditing. In version
0, the audit trail buckets events into 10 minute audit windows based on the
EventHeader timestamp. In version 1, the audit trail buckets events as follows:
if the schema has an outer KafkaAuditHeader, use the outer audit header
timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader
use that inner audit header's timestamp for bucketing",
+ "default" : null
+ }, {
+ "name" : "fabricUrn",
+ "type" : [ "null", "string" ],
+ "doc" : "The fabricUrn of the host from which the event is being
emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See
go/fabric.",
+ "default" : null
+ } ]
+ } ],
+ "doc" : "Header used by kafka for auditing the data in the kafka
pipeline",
+ "default" : null
+ }, {
+ "name" : "pageInstance",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "PageInstance",
+ "namespace" : "com.linkedin.events.common",
+ "fields" : [ {
+ "name" : "pageUrn",
+ "type" : "string",
+ "doc" : "The page entity. Example: urn:li:page:<pageKey>."
+ }, {
+ "name" : "trackingId",
+ "type" : {
+ "type" : "fixed",
+ "name" : "TrackingId",
+ "size" : 16
+ },
+ "doc" : "Uniquely identifies this rendering of the page."
+ } ]
+ } ],
+ "doc" : "The instance of a page to which the request that triggered
this event is responding. For more information see go/pageinstance",
+ "default" : null
+ }, {
+ "name" : "clientApplicationInstance",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "ApplicationInstance",
+ "namespace" : "com.linkedin.events.common",
+ "fields" : [ {
+ "name" : "applicationUrn",
+ "type" : "string",
+ "doc" : "The application. Example:
urn:li:application:<identifier>."
+ }, {
+ "name" : "version",
+ "type" : "string",
+ "doc" : "The internal version number of the running application in
standardized version format, see go/version."
+ }, {
+ "name" : "trackingId",
+ "type" : "TrackingId",
+ "doc" : "Uniquely identifies this instantiation of the
application. Created when an application is started from cold. Preserved
through application pause, suspend, loss of focus, background, etc."
+ } ]
+ } ],
+ "doc" : "The particular instance of a client application which
triggered this event. For more information see go/clientApplicationInstance",
+ "default" : null
+ }, {
+ "name" : "originSource",
+ "type" : [ "null", {
+ "type" : "enum",
+ "name" : "OriginSource",
+ "symbols" : [ "QPROD" ]
+ } ],
+ "doc" : "If present, identifies this request as having an origin in a
testing mechanism. If null, indicates a normal request from the external
internet. For more information see go/originSource",
+ "default" : null
+ } ]
+ },
+ "doc" : "The basic header for this tracking event."
+ }, {
+ "name" : "requestType",
+ "type" : {
+ "type" : "enum",
+ "name" : "ComplianceRequestType",
+ "symbols" : [ "TIME_LIMITED_FILTER", "GLOBAL_FILTER", "GLOBAL_UNFILTER",
"RETIRED_URN_FILTER" ]
+ },
+ "doc" : "The type of filter required by this event."
+ }, {
+ "name" : "conditionValueUrn",
+ "type" : "string",
+ "doc" : "Filter all records owned by this urn. Examples: member id urn,
lynda member id urn, etc."
+ }, {
+ "name" : "endTime",
+ "type" : [ "null", "long" ],
+ "doc" : "For certain request types, specifies the upper timestamp for
which the filter applies. Unit is ms since epoch."
+ }, {
+ "name" : "datasetRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "doc" : "URN restricting the datasets to which this request should be
applied. The request will be applied to a dataset that matches ANY of the given
restrictions. To match all datasets, use NULL. An empty array will be reported
as an error.",
+ "default" : null
+ }, {
+ "name" : "useCaseRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "doc" : "URN restricting the querying users to which this request should
be applied. The request will be applied to a use case that matches ANY of the
given restrictions. To match all use cases, use NULL. An empty array will be
reported as an error.",
+ "default" : null
+ }, {
+ "name" : "columnRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "doc" : "URN restricting the columns to which this request should be
applied. The request will be applied to a column that matches ANY of the given
restrictions. To match all columns, use NULL. An empty array will be reported
as an error.",
+ "default" : null
+ } ]
+}
\ No newline at end of file
diff --git
a/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/toValidateSchema.avsc
b/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/toValidateSchema.avsc
new file mode 100644
index 0000000..227ddf7
--- /dev/null
+++
b/gobblin-data-management/src/test/resources/avroSchemaCheckStrategyTest/toValidateSchema.avsc
@@ -0,0 +1,199 @@
+{
+ "type" : "record",
+ "name" : "OfflineComplianceRequestEvent",
+ "namespace" : "com.linkedin.events.compliance",
+ "fields" : [ {
+ "name" : "header",
+ "type" : {
+ "type" : "record",
+ "name" : "EventHeader",
+ "namespace" : "com.linkedin.events",
+ "fields" : [ {
+ "name" : "memberId",
+ "type" : "int"
+ }, {
+ "name" : "viewerUrn",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "applicationViewerUrn",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "csUserUrn",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "time",
+ "type" : "long"
+ }, {
+ "name" : "server",
+ "type" : "string"
+ }, {
+ "name" : "service",
+ "type" : "string"
+ }, {
+ "name" : "environment",
+ "type" : [ "string", "null" ],
+ "default" : ""
+ }, {
+ "name" : "guid",
+ "type" : {
+ "type" : "fixed",
+ "name" : "Guid",
+ "size" : 16
+ }
+ }, {
+ "name" : "treeId",
+ "type" : [ "null", {
+ "type" : "fixed",
+ "name" : "fixed_16",
+ "size" : 16
+ } ],
+ "default" : null
+ }, {
+ "name" : "requestId",
+ "type" : [ "null", "int" ],
+ "default" : null
+ }, {
+ "name" : "impersonatorId",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "version",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "instance",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "appName",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "testId",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "testSegmentId",
+ "type" : [ "null", "string" ],
+ "default" : null
+ }, {
+ "name" : "auditHeader",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "KafkaAuditHeader",
+ "fields" : [ {
+ "name" : "time",
+ "type" : "long"
+ }, {
+ "name" : "server",
+ "type" : "string"
+ }, {
+ "name" : "instance",
+ "type" : [ "null", "string" ]
+ }, {
+ "name" : "appName",
+ "type" : "string"
+ }, {
+ "name" : "messageId",
+ "type" : {
+ "type" : "fixed",
+ "name" : "UUID",
+ "size" : 16
+ }
+ }, {
+ "name" : "auditVersion",
+ "type" : [ "null", "int" ],
+ "default" : null
+ }, {
+ "name" : "fabricUrn",
+ "type" : [ "null", "string" ],
+ "default" : null
+ } ]
+ } ],
+ "default" : null
+ }, {
+ "name" : "pageInstance",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "PageInstance",
+ "namespace" : "com.linkedin.events.common",
+ "fields" : [ {
+ "name" : "pageUrn",
+ "type" : "string"
+ }, {
+ "name" : "trackingId",
+ "type" : {
+ "type" : "fixed",
+ "name" : "TrackingId",
+ "size" : 16
+ }
+ } ]
+ } ],
+ "default" : null
+ }, {
+ "name" : "clientApplicationInstance",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "ApplicationInstance",
+ "namespace" : "com.linkedin.events.common",
+ "fields" : [ {
+ "name" : "applicationUrn",
+ "type" : "string"
+ }, {
+ "name" : "version",
+ "type" : "string"
+ }, {
+ "name" : "trackingId",
+ "type" : "TrackingId"
+ } ]
+ } ],
+ "default" : null
+ }, {
+ "name" : "originSource",
+ "type" : [ "null", {
+ "type" : "enum",
+ "name" : "OriginSource",
+ "symbols" : [ "QPROD" ]
+ } ],
+ "default" : null
+ } ]
+ }
+ }, {
+ "name" : "requestType",
+ "type" : {
+ "type" : "enum",
+ "name" : "ComplianceRequestType",
+ "symbols" : [ "TIME_LIMITED_FILTER", "GLOBAL_FILTER", "GLOBAL_UNFILTER",
"RETIRED_URN_FILTER" ]
+ }
+ }, {
+ "name" : "conditionValueUrn",
+ "type" : "string"
+ }, {
+ "name" : "endTime",
+ "type" : [ "null", "long" ]
+ }, {
+ "name" : "datasetRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "default" : null
+ }, {
+ "name" : "useCaseRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "default" : null
+ }, {
+ "name" : "columnRestrictionUrns",
+ "type" : [ "null", {
+ "type" : "array",
+ "items" : "string"
+ } ],
+ "default" : null
+ } ]
+}