Repository: sqoop Updated Branches: refs/heads/sqoop2 5de04df61 -> 9541d4f32
SQOOP-2153: Sqoop2: Ensure creation of valid Avro schema names ( Jarek Jarcec Cecho via Gwen Shapira) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/9541d4f3 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/9541d4f3 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/9541d4f3 Branch: refs/heads/sqoop2 Commit: 9541d4f32f9f9c4bff3eae2929f7bd4601c89b29 Parents: 5de04df Author: Gwen Shapira <[email protected]> Authored: Sun Mar 1 18:10:38 2015 -0800 Committer: Gwen Shapira <[email protected]> Committed: Sun Mar 1 18:10:38 2015 -0800 ---------------------------------------------------------------------- .../connector/kite/util/KiteDataTypeUtil.java | 34 +++++++++++++++- .../connector/kite/util/TestDataTypeUtil.java | 42 ++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/9541d4f3/connector/connector-kite/src/main/java/org/apache/sqoop/connector/kite/util/KiteDataTypeUtil.java ---------------------------------------------------------------------- diff --git a/connector/connector-kite/src/main/java/org/apache/sqoop/connector/kite/util/KiteDataTypeUtil.java b/connector/connector-kite/src/main/java/org/apache/sqoop/connector/kite/util/KiteDataTypeUtil.java index 2851459..1074d37 100644 --- a/connector/connector-kite/src/main/java/org/apache/sqoop/connector/kite/util/KiteDataTypeUtil.java +++ b/connector/connector-kite/src/main/java/org/apache/sqoop/connector/kite/util/KiteDataTypeUtil.java @@ -57,11 +57,11 @@ public class KiteDataTypeUtil { String name = sqoopSchema.getName(); String doc = sqoopSchema.getNote(); String namespace = DEFAULT_SQOOP_SCHEMA_NAMESPACE; - Schema schema = Schema.createRecord(name, doc, namespace, false); + Schema schema = Schema.createRecord(toAvroName(name), doc, namespace, false); List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Column column : sqoopSchema.getColumnsArray()) { - Schema.Field field = new Schema.Field(column.getName(), + Schema.Field field = new Schema.Field(toAvroName(column.getName()), createAvroFieldSchema(column), null, null); field.addProp(SQOOP_TYPE, column.getType().toString()); fields.add(field); @@ -82,6 +82,36 @@ public class KiteDataTypeUtil { } } + /** + * Converts arbitrary string to valid Avro name. + * + * This method does not guarantee that different two strings + * ends up as different two strings after conversion. It's + * up to the caller to ensure uniqueness if/when needed. + * + * Valid Avro names: + * * Starts with [A-Za-z_] + * * Subsequently only [A-Za-z0-9_] + * + * http://avro.apache.org/docs/1.7.7/spec.html#Names * + * + * @param name + * @return + */ + static String toAvroName(String name) { + if(name == null || name.isEmpty()) { + return name; + } + + // If we're not starting with [A-Za-z_], prepend '_' + if(name.charAt(0) != '_' && !Character.isLetter(name.charAt(0)) ) { + name = "_" + name; + } + + // Otherwise replace all invalid characters with '_' + return name.replaceAll("[^0-9A-Za-z_]", "_"); + } + private static Schema.Type toAvroType(Column column) throws IllegalArgumentException { switch (column.getType()) { case ARRAY: http://git-wip-us.apache.org/repos/asf/sqoop/blob/9541d4f3/connector/connector-kite/src/test/java/org/apache/sqoop/connector/kite/util/TestDataTypeUtil.java ---------------------------------------------------------------------- diff --git a/connector/connector-kite/src/test/java/org/apache/sqoop/connector/kite/util/TestDataTypeUtil.java b/connector/connector-kite/src/test/java/org/apache/sqoop/connector/kite/util/TestDataTypeUtil.java new file mode 100644 index 0000000..df99f74 --- /dev/null +++ b/connector/connector-kite/src/test/java/org/apache/sqoop/connector/kite/util/TestDataTypeUtil.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.connector.kite.util; + +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + +/** + */ +public class TestDataTypeUtil { + @Test + public void testToAvroName() { + // All valid cases + assertEquals("name", KiteDataTypeUtil.toAvroName("name")); + assertEquals("nN9", KiteDataTypeUtil.toAvroName("nN9")); + assertEquals("_nN9", KiteDataTypeUtil.toAvroName("_nN9")); + assertEquals("_", KiteDataTypeUtil.toAvroName("_")); + + // Ensuring that first character is valid + assertEquals("_9", KiteDataTypeUtil.toAvroName("9")); + assertEquals("__", KiteDataTypeUtil.toAvroName("%")); + + // Rest of the string + assertEquals("_____________", KiteDataTypeUtil.toAvroName("!@#$%^&*()_+")); + } +}
