Github user ejwhite922 commented on a diff in the pull request: https://github.com/apache/incubator-rya/pull/153#discussion_r133825074 --- Diff: extras/indexing/src/main/java/org/apache/rya/indexing/entity/storage/mongo/MongoEntityStorage.java --- @@ -288,39 +290,79 @@ private boolean detectDuplicates(final Entity entity) throws EntityStorageExcept if (mongoTypeStorage == null) { mongoTypeStorage = new MongoTypeStorage(mongo, ryaInstanceName); } - final Builder builder = new Builder(); - builder.setSubject(entity.getSubject()); - boolean abort = false; - for (final RyaURI typeRyaUri : entity.getExplicitTypeIds()) { - Optional<Type> type; + + // Grab all entities that have all the same explicit types as our + // original Entity. + final List<Entity> comparisonEntities = searchHasAllExplicitTypes(entity.getExplicitTypeIds()); + + // Now that we have our set of potential duplicates, compare them. + // We can stop when we find one duplicate. + for (final Entity compareEntity : comparisonEntities) { try { - type = mongoTypeStorage.get(typeRyaUri); - } catch (final TypeStorageException e) { - throw new EntityStorageException("Unable to get entity type: " + typeRyaUri, e); + hasDuplicate = duplicateDataDetector.compareEntities(entity, compareEntity); + } catch (final SmartUriException e) { + throw new EntityStorageException("Encountered an error while comparing entities.", e); } - if (type.isPresent()) { - final ConvertingCursor<TypedEntity> cursor = search(Optional.empty(), type.get(), Collections.emptySet()); - while (cursor.hasNext()) { - final TypedEntity typedEntity = cursor.next(); - builder.setExplicitType(typeRyaUri); - for (final Property property : typedEntity.getProperties()) { - builder.setProperty(typeRyaUri, property); - } - } - } else { - abort = true; + if (hasDuplicate) { break; } } - if (!abort) { - final Entity entity2 = builder.build(); - try { - hasDuplicate = duplicateDataDetector.compareEntities(entity, entity2); - } catch (final SmartUriException e) { - throw new EntityStorageException("Encountered an error while comparing entities.", e); + } + return hasDuplicate; + } + + /** + * Searches the Entity storage for all Entities that contain all the + * specified explicit type IDs. + * @param explicitTypeIds the {@link ImmutableList} of {@link RyaURI}s that + * are being searched for. + * @return the {@link List} of {@link Entity}s that have all the specified + * explicit type IDs. If nothing was found an empty {@link List} is + * returned. + * @throws EntityStorageException + */ + private List<Entity> searchHasAllExplicitTypes(final ImmutableList<RyaURI> explicitTypeIds) throws EntityStorageException { + // Grab the first type from the explicit type IDs. + RyaURI firstType = null; + if (!explicitTypeIds.isEmpty()) { + firstType = explicitTypeIds.get(0); --- End diff -- The remaining typeIds get compared down with the Entity query results below. But I hastily pulled all this working logic into a separate function and ended up comparing them to themselves. I'll fix that.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---