This is an automated email from the ASF dual-hosted git repository.
sarath pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/master by this push:
new 784b606 ATLAS-342: Import Glossary Terms from csv/excel file into
Glossary
784b606 is described below
commit 784b606ddfa2149535b437483051a68859689a9d
Author: mayanknj <[email protected]>
AuthorDate: Fri Mar 20 23:40:51 2020 -0700
ATLAS-342: Import Glossary Terms from csv/excel file into Glossary
Signed-off-by: Sarath Subramanian <[email protected]>
---
.../org/apache/atlas/repository/Constants.java | 5 +
.../main/java/org/apache/atlas/AtlasErrorCode.java | 6 +-
.../glossary/relations/AtlasGlossaryHeader.java | 7 +
pom.xml | 5 +
repository/pom.xml | 18 ++
.../org/apache/atlas/glossary/GlossaryService.java | 44 ++++-
.../apache/atlas/glossary/GlossaryTermUtils.java | 208 +++++++++++++++++++++
.../org/apache/atlas/glossary/GlossaryUtils.java | 5 +-
.../main/java/org/apache/atlas/util/FileUtils.java | 126 +++++++++++++
.../apache/atlas/glossary/GlossaryServiceTest.java | 96 +++++++++-
repository/src/test/resources/csvFiles/empty.csv | 0
.../src/test/resources/csvFiles/incorrectFile.csv | 2 +
.../src/test/resources/csvFiles/template_1.csv | 2 +
.../src/test/resources/excelFiles/template_1.xlsx | Bin 0 -> 5417 bytes
.../org/apache/atlas/web/rest/GlossaryREST.java | 57 +++++-
15 files changed, 567 insertions(+), 14 deletions(-)
diff --git a/common/src/main/java/org/apache/atlas/repository/Constants.java
b/common/src/main/java/org/apache/atlas/repository/Constants.java
index 7c0fd56..42600f2 100644
--- a/common/src/main/java/org/apache/atlas/repository/Constants.java
+++ b/common/src/main/java/org/apache/atlas/repository/Constants.java
@@ -201,6 +201,11 @@ public final class Constants {
public static final String ATTR_NAME_REPLICATED_FROM = "replicatedFrom";
public static final Integer INCOMPLETE_ENTITY_VALUE = Integer.valueOf(1);
+ /*
+ * All supported file-format extensions for AtlasGlossaryTerms file upload
+ */
+ public enum GlossaryImportSupportedFileExtensions { XLSX, XLS, CSV }
+
private Constants() {
}
diff --git a/intg/src/main/java/org/apache/atlas/AtlasErrorCode.java
b/intg/src/main/java/org/apache/atlas/AtlasErrorCode.java
index 1bd2fd0..ff56402 100644
--- a/intg/src/main/java/org/apache/atlas/AtlasErrorCode.java
+++ b/intg/src/main/java/org/apache/atlas/AtlasErrorCode.java
@@ -167,6 +167,7 @@ public enum AtlasErrorCode {
INVALID_BUSINESS_METADATA_NAME_FOR_ENTITY_TYPE(400, "ATLAS-400-00-095",
"Invalid business-metadata: {0} specified for entity, applicable
business-metadata: {1}"),
BUSINESS_METADATA_ATTRIBUTE_DOES_NOT_EXIST(400, "ATLAS-400-00-096",
"Business-metadata attribute does not exist in entity: {0}"),
BUSINESS_METADATA_ATTRIBUTE_ALREADY_EXISTS(400, "ATLAS-400-00-097",
"Business-metadata attribute already exists in entity: {0}"),
+ INVALID_FILE_TYPE(400, "ATLAS-400-00-98", "The provided file type {0} is
not supported."),
UNAUTHORIZED_ACCESS(403, "ATLAS-403-00-001", "{0} is not authorized to
perform {1}"),
@@ -190,6 +191,7 @@ public enum AtlasErrorCode {
INVALID_LINEAGE_ENTITY_TYPE(404, "ATLAS-404-00-011", "Given instance guid
{0} with type {1} is not a valid lineage entity type."),
INSTANCE_GUID_DELETED(404, "ATLAS-404-00-012", "Given instance guid {0}
has been deleted"),
NO_PROPAGATED_CLASSIFICATIONS_FOUND_FOR_ENTITY(404, "ATLAS-404-00-013",
"No propagated classifications associated with entity: {0}"),
+ NO_DATA_FOUND(404, "ATLAS-404-00-014", "No data found in the uploaded
file"),
// All data conflict errors go here
TYPE_ALREADY_EXISTS(409, "ATLAS-409-00-001", "Given type {0} already
exists"),
@@ -221,7 +223,9 @@ public enum AtlasErrorCode {
HIVE_HOOK(500, "ATLAS-500-00-010", "HiveHook: {0}"),
HIVE_HOOK_METASTORE_BRIDGE(500, "ATLAS-500-00-011",
"HiveHookMetaStoreBridge: {0}"),
DATA_ACCESS_LOAD_FAILED(500, "ATLAS-500-00-013", "Load failed: {0}"),
- ENTITY_NOTIFICATION_FAILED(500, "ATLAS-500-00-014", "Notification failed
for operation: {0} : {1}");
+ ENTITY_NOTIFICATION_FAILED(500, "ATLAS-500-00-014", "Notification failed
for operation: {0} : {1}"),
+ FAILED_TO_UPLOAD(500, "ATLAS-500-00-015", "Error occurred while uploading
the file: {0}"),
+ FAILED_TO_CREATE_GLOSSARY_TERM(500, "ATLAS-500-00-016", "Error occurred
while creating glossary term: {0}");
private String errorCode;
private String errorMessage;
diff --git
a/intg/src/main/java/org/apache/atlas/model/glossary/relations/AtlasGlossaryHeader.java
b/intg/src/main/java/org/apache/atlas/model/glossary/relations/AtlasGlossaryHeader.java
index 660514b..da1a3f1 100644
---
a/intg/src/main/java/org/apache/atlas/model/glossary/relations/AtlasGlossaryHeader.java
+++
b/intg/src/main/java/org/apache/atlas/model/glossary/relations/AtlasGlossaryHeader.java
@@ -27,6 +27,13 @@ public class AtlasGlossaryHeader {
private String relationGuid;
private String displayText;
+ public AtlasGlossaryHeader(String glossaryGuid) {
+ this.glossaryGuid = glossaryGuid;
+ }
+
+ public AtlasGlossaryHeader() {
+ }
+
public String getDisplayText() {
return displayText;
}
diff --git a/pom.xml b/pom.xml
index a6da9b8..7e9c7e0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -706,6 +706,11 @@
<commons-io.version>2.6</commons-io.version>
<!-- Apache commons -->
+ <!--Apache poi and Open csv-->
+ <opencsv.version>4.6</opencsv.version>
+ <poi.version>3.17</poi.version>
+ <poi-ooxml.version>3.17</poi-ooxml.version>
+
<javax-inject.version>1</javax-inject.version>
<jettison.version>1.3.7</jettison.version>
<paranamer.version>2.7</paranamer.version>
diff --git a/repository/pom.xml b/repository/pom.xml
index 802d587..8618962 100755
--- a/repository/pom.xml
+++ b/repository/pom.xml
@@ -224,6 +224,24 @@
<version>${hppc.version}</version>
</dependency>
+ <dependency>
+ <groupId>com.opencsv</groupId>
+ <artifactId>opencsv</artifactId>
+ <version>${opencsv.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>${poi.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>${poi-ooxml.version}</version>
+ </dependency>
+
</dependencies>
<profiles>
diff --git
a/repository/src/main/java/org/apache/atlas/glossary/GlossaryService.java
b/repository/src/main/java/org/apache/atlas/glossary/GlossaryService.java
index 9229d2d..d630f66 100644
--- a/repository/src/main/java/org/apache/atlas/glossary/GlossaryService.java
+++ b/repository/src/main/java/org/apache/atlas/glossary/GlossaryService.java
@@ -34,6 +34,7 @@ import
org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
import org.apache.atlas.repository.store.graph.v2.AtlasEntityChangeNotifier;
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.type.AtlasTypeRegistry;
+import org.apache.atlas.util.FileUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
@@ -41,6 +42,8 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.inject.Inject;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -51,9 +54,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
-import static
org.apache.atlas.glossary.GlossaryUtils.getAtlasGlossaryCategorySkeleton;
-import static
org.apache.atlas.glossary.GlossaryUtils.getAtlasGlossaryTermSkeleton;
-import static org.apache.atlas.glossary.GlossaryUtils.getGlossarySkeleton;
+import static org.apache.atlas.glossary.GlossaryUtils.*;
@Service
public class GlossaryService {
@@ -67,7 +68,7 @@ public class GlossaryService {
private final AtlasTypeRegistry atlasTypeRegistry;
private final AtlasEntityChangeNotifier entityChangeNotifier;
- private final char[] invalidNameChars = {'@', '.'};
+ private static final char[] invalidNameChars = { '@', '.' };
@Inject
public GlossaryService(DataAccess dataAccess, final AtlasRelationshipStore
relationshipStore,
@@ -1029,7 +1030,7 @@ public class GlossaryService {
termHeaders.forEach(t ->
t.setDisplayText(getDisplayText(termMap.get(t.getTermGuid()))));
}
- private boolean isNameInvalid(String name) {
+ public static boolean isNameInvalid(String name) {
return StringUtils.containsAny(name, invalidNameChars);
}
@@ -1080,4 +1081,37 @@ public class GlossaryService {
}
}
+ public List<AtlasGlossaryTerm> importGlossaryData(InputStream inputStream,
String fileName) throws AtlasBaseException {
+ List<AtlasGlossaryTerm> ret;
+
+ try {
+ if (StringUtils.isBlank(fileName)) {
+ throw new AtlasBaseException(AtlasErrorCode.INVALID_FILE_TYPE,
fileName);
+ }
+
+ List<String[]> fileData = FileUtils.readFileData(fileName,
inputStream);
+ List<String> failedTermMsgs = new ArrayList<>();
+
+ ret = glossaryTermUtils.getGlossaryTermDataList(fileData,
failedTermMsgs);
+ ret = createGlossaryTerms(ret);
+ } catch (IOException e) {
+ throw new AtlasBaseException(AtlasErrorCode.FAILED_TO_UPLOAD, e);
+ }
+
+ return ret;
+ }
+
+ private List<AtlasGlossaryTerm>
createGlossaryTerms(List<AtlasGlossaryTerm> glossaryTerms) throws
AtlasBaseException {
+ List<AtlasGlossaryTerm> ret = new ArrayList<>();
+
+ for (AtlasGlossaryTerm glossaryTerm : glossaryTerms) {
+ try {
+ ret.add(createTerm(glossaryTerm));
+ } catch (AtlasBaseException e) {
+ throw new
AtlasBaseException(AtlasErrorCode.FAILED_TO_CREATE_GLOSSARY_TERM, e);
+ }
+ }
+
+ return ret;
+ }
}
diff --git
a/repository/src/main/java/org/apache/atlas/glossary/GlossaryTermUtils.java
b/repository/src/main/java/org/apache/atlas/glossary/GlossaryTermUtils.java
index cdc3f07..2c84ec7 100644
--- a/repository/src/main/java/org/apache/atlas/glossary/GlossaryTermUtils.java
+++ b/repository/src/main/java/org/apache/atlas/glossary/GlossaryTermUtils.java
@@ -28,19 +28,26 @@ import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasRelatedObjectId;
import org.apache.atlas.model.instance.AtlasRelationship;
import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.repository.ogm.DataAccess;
import org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
+import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.type.AtlasRelationshipType;
import org.apache.atlas.type.AtlasTypeRegistry;
+import org.apache.atlas.util.FileUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
@@ -522,4 +529,205 @@ public class GlossaryTermUtils extends GlossaryUtils {
}
}
+ protected List<AtlasGlossaryTerm> getGlossaryTermDataList(List<String[]>
fileData, List<String> failedTermMsgs) throws AtlasBaseException {
+ List<AtlasGlossaryTerm> glossaryTerms = new ArrayList<>();
+ Map<String, String> glossaryNameCache = new HashMap<>();
+
+ for (String[] record : fileData) {
+ AtlasGlossaryTerm glossaryTerm = new AtlasGlossaryTerm();
+
+ if ((record.length < 1) || StringUtils.isBlank(record[0])) {
+ LOG.error("The GlossaryName is blank for the record : ",
Arrays.toString(record));
+ failedTermMsgs.add("The GlossaryName is blank for the record :
" + Arrays.toString(record));
+ }
+
+ String glossaryName = record[0];
+ String glossaryGuid;
+
+ if (glossaryNameCache.get(glossaryName) != null) {
+ glossaryGuid = glossaryNameCache.get(glossaryName);
+
+ } else {
+ AtlasVertex vertex =
AtlasGraphUtilsV2.findByTypeAndUniquePropertyName(GlossaryUtils.ATLAS_GLOSSARY_TYPENAME,
GlossaryUtils.ATLAS_GLOSSARY_TYPENAME + "." + QUALIFIED_NAME_ATTR,
glossaryName);
+
+ glossaryGuid = (vertex != null) ?
AtlasGraphUtilsV2.getIdFromVertex(vertex) : null;
+ }
+
+ if (glossaryGuid == null) {
+ if (GlossaryService.isNameInvalid(glossaryName)) {
+ LOG.error("The provided Glossary Name is invalid : " +
glossaryName);
+ failedTermMsgs.add("The provided Glossary Name is invalid
: " + glossaryName);
+ } else {
+ AtlasGlossary glossary = new AtlasGlossary();
+ glossary.setQualifiedName(glossaryName);
+ glossary.setName(glossaryName);
+
+ glossary = dataAccess.save(glossary);
+ glossaryGuid = glossary.getGuid();
+ }
+ }
+
+ if (glossaryGuid != null) {
+ glossaryNameCache.put(glossaryName, glossaryGuid);
+ glossaryTerm = populateGlossaryTermObject(failedTermMsgs,
record, glossaryGuid);
+ glossaryTerms.add(glossaryTerm);
+ }
+ }
+
+ if (failedTermMsgs.size() == 0) {
+ return glossaryTerms;
+ } else {
+ throw new AtlasBaseException("The uploaded file has not been
processed due to the following errors : " + "\n" + failedTermMsgs.toString());
+ }
+ }
+
+ public static String getGlossaryTermHeaders() {
+ List<String> ret = new ArrayList<>();
+
+ ret.add("GlossaryName");
+ ret.add("TermName");
+ ret.add("ShortDescription");
+ ret.add("LongDescription");
+ ret.add("Examples");
+ ret.add("Abbreviation");
+ ret.add("Usage");
+ ret.add("AdditionalAttributes");
+ ret.add("TranslationTerms");
+ ret.add("ValidValuesFor");
+ ret.add("Synonyms");
+ ret.add("ReplacedBy");
+ ret.add("ValidValues");
+ ret.add("ReplacementTerms");
+ ret.add("SeeAlso");
+ ret.add("TranslatedTerms");
+ ret.add("IsA");
+ ret.add("Antonyms");
+ ret.add("Classifies");
+ ret.add("PreferredToTerms");
+ ret.add("PreferredTerms");
+
+ return String.join(", ", ret);
+ }
+
+ protected Map getMapValue(String csvRecord, List<String> failedTermMsgs) {
+ Map ret = null;
+
+ if (StringUtils.isNotBlank(csvRecord)) {
+ ret = new HashMap<>();
+ String csvRecordArray[] =
csvRecord.split(FileUtils.ESCAPE_CHARACTER + FileUtils.PIPE_CHARACTER);
+ String recordArray[];
+
+ for (String record : csvRecordArray) {
+ recordArray = record.split(FileUtils.COLON_CHARACTER);
+
+ if ((recordArray.length % 2) == 0) {
+ ret.put(recordArray[0], recordArray[1]);
+ } else {
+ failedTermMsgs.add("\n" + "The Data in the uploaded file
is incorrectly specified : " + csvRecord);
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ protected List getListValue(String csvRecord) {
+ List ret = null;
+
+ if (StringUtils.isNotBlank(csvRecord)) {
+ ret = Arrays.asList(csvRecord.split(FileUtils.ESCAPE_CHARACTER +
FileUtils.PIPE_CHARACTER));
+ }
+
+ return ret;
+ }
+
+ protected Set getAtlasRelatedTermHeaderSet(String csvRecord, String
termName, String glossaryName, List<String> failedTermMsgs) {
+ Set ret = null;
+
+ if (StringUtils.isNotBlank(csvRecord)) {
+ ret = new HashSet();
+ String csvRecordArray[] =
csvRecord.split(FileUtils.ESCAPE_CHARACTER + FileUtils.PIPE_CHARACTER);
+ AtlasRelatedTermHeader relatedTermHeader;
+
+ for (String data : csvRecordArray) {
+ AtlasVertex vertex = null;
+ String dataArray[] =
data.split(FileUtils.ESCAPE_CHARACTER + FileUtils.COLON_CHARACTER);
+
+ if ((dataArray.length % 2) == 0) {
+ vertex =
AtlasGraphUtilsV2.findByTypeAndUniquePropertyName(GlossaryUtils.ATLAS_GLOSSARY_TERM_TYPENAME,
+ GlossaryUtils.ATLAS_GLOSSARY_TERM_TYPENAME +
invalidNameChars[1] + QUALIFIED_NAME_ATTR, dataArray[1] + invalidNameChars[0] +
dataArray[0]);
+ } else {
+ failedTermMsgs.add("\n" + "Either incorrect data specified
for Term or Term does not exist : " +termName);
+ }
+
+ if (vertex != null) {
+ String glossaryTermGuid =
AtlasGraphUtilsV2.getIdFromVertex(vertex);
+ relatedTermHeader = new AtlasRelatedTermHeader();
+ relatedTermHeader.setTermGuid(glossaryTermGuid);
+ ret.add(relatedTermHeader);
+ } else {
+ failedTermMsgs.add("\n" + "The provided Reference Glossary
and TermName does not exist in the system " +
+ dataArray[1] + FileUtils.COLON_CHARACTER +
dataArray[0] + " for record with TermName : " + termName + " and GlossaryName
: " + glossaryName);
+ }
+ }
+
+ return ret;
+ }
+
+ return ret;
+ }
+
+ protected AtlasGlossaryTerm populateGlossaryTermObject(List<String>
failedTermMsgList, String[] record, String glossaryGuid) {
+ AtlasGlossaryTerm ret = new AtlasGlossaryTerm();
+ int i = 0;
+ int length = record.length;
+
+ ret.setName((length > ++i) ? record[i] : null);
+
+ if (!StringUtils.isNotBlank(ret.getName())) {
+ failedTermMsgList.add("\n" + "The TermName is blank for provided
record: " + Arrays.toString(record));
+ } else {
+ ret.setShortDescription((length > ++i) ? record[i] : null);
+
+ ret.setLongDescription((length > ++i) ? record[i] : null);
+
+ ret.setExamples((length > ++i) ? (List<String>)
getListValue(record[i]) : null);
+
+ ret.setAbbreviation((length > ++i) ? record[i] : null);
+
+ ret.setUsage((length > ++i) ? record[i] : null);
+
+ ret.setAdditionalAttributes(((length > ++i) ? (Map<String,
Object>) getMapValue(record[i], failedTermMsgList) : null));
+
+ ret.setTranslationTerms((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+
+ ret.setValidValuesFor((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+
+ ret.setSynonyms((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setReplacedBy((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setValidValues((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setReplacementTerms((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+
+ ret.setSeeAlso((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setTranslatedTerms((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+
+ ret.setIsA((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setAnchor(new AtlasGlossaryHeader(glossaryGuid));
+
+ ret.setAntonyms((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setClassifies((length > ++i) ? (Set<AtlasRelatedTermHeader>)
getAtlasRelatedTermHeaderSet(record[i], ret.getName(), record[0],
failedTermMsgList) : null);
+
+ ret.setPreferredToTerms((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+
+ ret.setPreferredTerms((length > ++i) ?
(Set<AtlasRelatedTermHeader>) getAtlasRelatedTermHeaderSet(record[i],
ret.getName(), record[0], failedTermMsgList) : null);
+ }
+
+ return ret;
+ }
}
diff --git
a/repository/src/main/java/org/apache/atlas/glossary/GlossaryUtils.java
b/repository/src/main/java/org/apache/atlas/glossary/GlossaryUtils.java
index 9625f94..2a2cebb 100644
--- a/repository/src/main/java/org/apache/atlas/glossary/GlossaryUtils.java
+++ b/repository/src/main/java/org/apache/atlas/glossary/GlossaryUtils.java
@@ -17,7 +17,6 @@
*/
package org.apache.atlas.glossary;
-import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.glossary.AtlasGlossary;
import org.apache.atlas.model.glossary.AtlasGlossaryCategory;
@@ -44,6 +43,10 @@ public abstract class GlossaryUtils {
static final String ATLAS_GLOSSARY_TERM_TYPENAME = "AtlasGlossaryTerm";
static final String ATLAS_GLOSSARY_CATEGORY_TYPENAME =
"AtlasGlossaryCategory";
+ public static final String NAME = "name";
+ public static final String QUALIFIED_NAME_ATTR = "qualifiedName";
+ public static final char[] invalidNameChars = {'@', '.'};
+
// Relation name constants
protected static final String ATLAS_GLOSSARY_PREFIX =
ATLAS_GLOSSARY_TYPENAME;
protected static final String TERM_ANCHOR =
ATLAS_GLOSSARY_PREFIX + "TermAnchor";
diff --git a/repository/src/main/java/org/apache/atlas/util/FileUtils.java
b/repository/src/main/java/org/apache/atlas/util/FileUtils.java
new file mode 100644
index 0000000..7b992ce
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/util/FileUtils.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.util;
+
+import com.opencsv.CSVReader;
+import org.apache.atlas.AtlasErrorCode;
+import org.apache.atlas.exception.AtlasBaseException;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import static
org.apache.atlas.repository.Constants.GlossaryImportSupportedFileExtensions.*;
+
+public class FileUtils {
+ public static final String PIPE_CHARACTER = "|";
+ public static final String COLON_CHARACTER = ":";
+ public static final String ESCAPE_CHARACTER = "\\";
+
+ public static List<String[]> readFileData(String fileName, InputStream
inputStream) throws IOException, AtlasBaseException {
+ List<String[]> ret;
+ String extension =
FilenameUtils.getExtension(fileName);
+
+ if (extension.equalsIgnoreCase(CSV.name())) {
+ ret = readCSV(inputStream);
+ } else if (extension.equalsIgnoreCase(XLS.name()) ||
extension.equalsIgnoreCase(XLSX.name())) {
+ ret = readExcel(inputStream, extension);
+ } else {
+ throw new AtlasBaseException(AtlasErrorCode.INVALID_FILE_TYPE);
+ }
+
+ if (CollectionUtils.isEmpty(ret)) {
+ throw new AtlasBaseException(AtlasErrorCode.NO_DATA_FOUND);
+ }
+
+ return ret;
+ }
+
+ public static List<String[]> readCSV(InputStream inputStream) throws
IOException {
+ List<String[]> ret = new ArrayList<>();
+
+ try (CSVReader csvReader = new CSVReader(new
InputStreamReader(inputStream))) {
+ String[] header = csvReader.readNext();
+
+ if (header == null || header.length == 0) {
+ return ret;
+ }
+
+ String[] data;
+
+ while ((data = csvReader.readNext()) != null) {
+ if (data.length > 1) {
+ ret.add(data);
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ public static List<String[]> readExcel(InputStream inputStream, String
extension) throws IOException {
+ List<String[]> ret = new ArrayList<>();
+ Workbook excelBook = extension.equalsIgnoreCase(XLS.name()) ?
new HSSFWorkbook(inputStream) : new XSSFWorkbook(inputStream);
+ Sheet excelSheet = excelBook.getSheetAt(0);
+ Iterator itr = excelSheet.rowIterator();
+ Row headerRow = (Row) itr.next();
+
+ if (isRowEmpty(headerRow)) {
+ return ret;
+ }
+
+ while (itr.hasNext()) {
+ Row row = (Row) itr.next();
+
+ if (!isRowEmpty(row)) {
+ String[] data = new String[row.getLastCellNum()];
+
+ for (int i = 0; i < row.getLastCellNum(); i++) {
+ data[i] = (row.getCell(i) != null) ?
row.getCell(i).getStringCellValue().trim() : null;
+ }
+
+ ret.add(data);
+ }
+ }
+
+ return ret;
+ }
+
+ private static boolean isRowEmpty(Row row) {
+ for (int c = row.getFirstCellNum(); c < row.getLastCellNum(); c++) {
+ Cell cell = row.getCell(c);
+
+ if (cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+}
\ No newline at end of file
diff --git
a/repository/src/test/java/org/apache/atlas/glossary/GlossaryServiceTest.java
b/repository/src/test/java/org/apache/atlas/glossary/GlossaryServiceTest.java
index 759dcdf..86738bb 100644
---
a/repository/src/test/java/org/apache/atlas/glossary/GlossaryServiceTest.java
+++
b/repository/src/test/java/org/apache/atlas/glossary/GlossaryServiceTest.java
@@ -36,13 +36,14 @@ import org.apache.atlas.model.instance.AtlasRelatedObjectId;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.model.typedef.AtlasClassificationDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
-import org.apache.atlas.utils.TestLoadModelUtils;
import org.apache.atlas.repository.store.graph.AtlasEntityStore;
import org.apache.atlas.repository.store.graph.v2.AtlasEntityStream;
import org.apache.atlas.store.AtlasTypeDefStore;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.atlas.utils.AtlasJson;
+import org.apache.atlas.utils.TestLoadModelUtils;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.SkipException;
@@ -52,7 +53,11 @@ import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import javax.inject.Inject;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -80,13 +85,16 @@ public class GlossaryServiceTest {
private AtlasRelatedObjectId relatedObjectId;
+ public static final String CSV_FILES = "/csvFiles/";
+ public static final String EXCEL_FILES = "/excelFiles/";
+
@DataProvider
public static Object[][] getGlossaryTermsProvider() {
return new Object[][]{
// offset, limit, expected
- {0, -1, 4},
+ {0, -1, 6},
{0, 2, 2},
- {2, 5, 2},
+ {2, 5, 4},
};
}
@@ -845,7 +853,6 @@ public class GlossaryServiceTest {
} catch (AtlasBaseException e) {
fail("RelatedTerm association should've succeeded", e);
}
-
}
@Test(dataProvider = "getGlossaryTermsProvider" , groups =
"Glossary.GET.postUpdate", dependsOnGroups = "Glossary.UPDATE")
@@ -897,7 +904,6 @@ public class GlossaryServiceTest {
};
}
-
@Test(dataProvider = "getCategoryTermsProvider", dependsOnGroups =
"Glossary.CREATE")
public void testGetCategoryTerms(int offset, int limit, int expected) {
for (AtlasGlossaryCategory c : Arrays.asList(accountCategory,
mortgageCategory)) {
@@ -910,4 +916,84 @@ public class GlossaryServiceTest {
}
}
}
+
+ @Test
+ public void testGetTemplate(){
+ try {
+ String glossaryTermHeaderListAsString =
GlossaryTermUtils.getGlossaryTermHeaders();
+
+ assertNotNull(glossaryTermHeaderListAsString);
+ assertEquals(glossaryTermHeaderListAsString,"GlossaryName,
TermName, ShortDescription, LongDescription, Examples, Abbreviation, Usage,
AdditionalAttributes, TranslationTerms, ValidValuesFor, Synonyms, ReplacedBy,
ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms, IsA, Antonyms,
Classifies, PreferredToTerms, PreferredTerms");
+ } catch (Exception e) {
+ fail("The Template for Glossary Term should've been a success",e);
+ }
+ }
+
+ @Test( dependsOnGroups = "Glossary.CREATE" )
+ public void testImportGlossaryData(){
+ try {
+ InputStream inputStream =
getFile(CSV_FILES,"template_1.csv");
+ List<AtlasGlossaryTerm> atlasGlossaryTermList =
glossaryService.importGlossaryData(inputStream,"template_1.csv");
+
+ assertNotNull(atlasGlossaryTermList);
+ assertEquals(atlasGlossaryTermList.size(), 1);
+
+ InputStream inputStream1 =
getFile(EXCEL_FILES,"template_1.xlsx");
+ List<AtlasGlossaryTerm> atlasGlossaryTermList1 =
glossaryService.importGlossaryData(inputStream1,"template_1.xlsx");
+
+ assertNotNull(atlasGlossaryTermList1);
+ assertEquals(atlasGlossaryTermList1.size(), 1);
+ } catch (AtlasBaseException e){
+ fail("The GlossaryTerm should have been created "+e);
+ }
+ }
+
+ @Test
+ public void testEmptyFileException() {
+ InputStream inputStream = getFile(CSV_FILES, "empty.csv");
+
+ try {
+ glossaryService.importGlossaryData(inputStream, "empty.csv");
+ fail("Error occurred : Failed to recognize the empty file.");
+ } catch (AtlasBaseException e) {
+ assertEquals(e.getMessage(),"No Data found in the uploaded file
!");
+ }
+ }
+
+ @Test
+ public void testIncorrectFileException() {
+ InputStream inputStream = getFile(CSV_FILES, "incorrectFile.csv");
+
+ try {
+ glossaryService.importGlossaryData(inputStream,
"incorrectFile.csv");
+ fail("Error occurred : Failed to recognize the incorrect file.");
+ } catch (AtlasBaseException e) {
+ assertEquals(e.getMessage(),"The uploaded file has not been
processed due to the following errors : \n" +
+ "[\n" +
+ "The provided Reference Glossary and TermName does not
exist in the system GentsFootwear: for record with TermName : BankBranch1 and
GlossaryName : testBankingGlossary]");
+ }
+ }
+
+ private static InputStream getFile(String subDir, String fileName){
+ final String userDir = System.getProperty("user.dir");
+ String filePath = getTestFilePath(userDir, subDir, fileName);
+ File f = new File(filePath);
+ InputStream fs = null;
+
+ try {
+ fs = new FileInputStream(f);
+ } catch (FileNotFoundException e) {
+ LOG.error("File could not be found at: {}", filePath, e);
+ }
+
+ return fs;
+ }
+
+ private static String getTestFilePath(String startPath, String subDir,
String fileName) {
+ if (StringUtils.isNotEmpty(subDir)) {
+ return startPath + "/src/test/resources/" + subDir + "/" +
fileName;
+ } else {
+ return startPath + "/src/test/resources/" + fileName;
+ }
+ }
}
\ No newline at end of file
diff --git a/repository/src/test/resources/csvFiles/empty.csv
b/repository/src/test/resources/csvFiles/empty.csv
new file mode 100644
index 0000000..e69de29
diff --git a/repository/src/test/resources/csvFiles/incorrectFile.csv
b/repository/src/test/resources/csvFiles/incorrectFile.csv
new file mode 100644
index 0000000..3a9dd4a
--- /dev/null
+++ b/repository/src/test/resources/csvFiles/incorrectFile.csv
@@ -0,0 +1,2 @@
+GlossaryName, TermName, ShortDescription, LongDescription, Examples,
Abbreviation, Usage, AdditionalAttributes, TranslationTerms, ValidValuesFor,
Synonyms, ReplacedBy, ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms,
IsA, Antonyms, Classifies, PreferredToTerms, PreferredTerms
+testBankingGlossary,BankBranch1,SD4,LD4,"EXAMPLE","ABBREVIATION","USAGE",,,,,,,,,,,,,":GentsFootwear",
diff --git a/repository/src/test/resources/csvFiles/template_1.csv
b/repository/src/test/resources/csvFiles/template_1.csv
new file mode 100644
index 0000000..c535e07
--- /dev/null
+++ b/repository/src/test/resources/csvFiles/template_1.csv
@@ -0,0 +1,2 @@
+GlossaryName, TermName, ShortDescription, LongDescription, Examples,
Abbreviation, Usage, AdditionalAttributes, TranslationTerms, ValidValuesFor,
Synonyms, ReplacedBy, ValidValues, ReplacementTerms, SeeAlso, TranslatedTerms,
IsA, Antonyms, Classifies, PreferredToTerms, PreferredTerms
+testBankingGlossary,BankBranch,SD4,LD4,"EXAMPLE","ABBREVIATION","USAGE",,,,,,,,,,,,,,
diff --git a/repository/src/test/resources/excelFiles/template_1.xlsx
b/repository/src/test/resources/excelFiles/template_1.xlsx
new file mode 100644
index 0000000..619fcac
Binary files /dev/null and
b/repository/src/test/resources/excelFiles/template_1.xlsx differ
diff --git a/webapp/src/main/java/org/apache/atlas/web/rest/GlossaryREST.java
b/webapp/src/main/java/org/apache/atlas/web/rest/GlossaryREST.java
index 151aa6b..78d52ad 100644
--- a/webapp/src/main/java/org/apache/atlas/web/rest/GlossaryREST.java
+++ b/webapp/src/main/java/org/apache/atlas/web/rest/GlossaryREST.java
@@ -17,10 +17,13 @@
*/
package org.apache.atlas.web.rest;
+import com.sun.jersey.core.header.FormDataContentDisposition;
+import com.sun.jersey.multipart.FormDataParam;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.SortOrder;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.glossary.GlossaryService;
+import org.apache.atlas.glossary.GlossaryTermUtils;
import org.apache.atlas.model.glossary.AtlasGlossary;
import org.apache.atlas.model.glossary.AtlasGlossaryCategory;
import org.apache.atlas.model.glossary.AtlasGlossaryTerm;
@@ -35,8 +38,22 @@ import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.inject.Inject;
-import javax.ws.rs.*;
+import javax.ws.rs.Consumes;
+import javax.ws.rs.DELETE;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.PUT;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.StreamingOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -934,4 +951,40 @@ public class GlossaryREST {
return ret;
}
-}
+ /**
+ * Get sample template for uploading/creating bulk AtlasGlossaryTerm
+ *
+ * @return Template File
+ * @HTTP 400 If the provided fileType is not supported
+ */
+ @GET
+ @Path("/import/template")
+ @Produces(MediaType.APPLICATION_OCTET_STREAM)
+ public StreamingOutput produceTemplate() {
+ return new StreamingOutput() {
+ @Override
+ public void write(OutputStream outputStream) throws IOException,
WebApplicationException {
+
outputStream.write(GlossaryTermUtils.getGlossaryTermHeaders().getBytes());
+ }
+ };
+ }
+
+ /**
+ * Upload glossary file for creating AtlasGlossaryTerms in bulk
+ *
+ * @param inputStream InputStream of file
+ * @param fileDetail FormDataContentDisposition metadata of file
+ * @return
+ * @throws AtlasBaseException
+ * @HTTP 200 If glossary term creation was successful
+ * @HTTP 400 If Glossary term definition has invalid or missing information
+ * @HTTP 409 If Glossary term already exists (duplicate qualifiedName)
+ */
+ @POST
+ @Path("/import")
+ @Consumes(MediaType.MULTIPART_FORM_DATA)
+ public List<AtlasGlossaryTerm> importGlossaryData(@FormDataParam("file")
InputStream inputStream,
+ @FormDataParam("file")
FormDataContentDisposition fileDetail) throws AtlasBaseException {
+ return glossaryService.importGlossaryData(inputStream,
fileDetail.getFileName());
+ }
+}
\ No newline at end of file