[ https://issues.apache.org/jira/browse/METRON-1061?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16112802#comment-16112802 ]
ASF GitHub Bot commented on METRON-1061: ---------------------------------------- Github user ottobackwards commented on a diff in the pull request: https://github.com/apache/metron/pull/667#discussion_r131155065 --- Diff: metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/TextFunctions.java --- @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.metron.stellar.dsl.functions; + +import java.util.List; +import java.util.Locale; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.text.similarity.FuzzyScore; +import org.apache.metron.stellar.dsl.BaseStellarFunction; +import org.apache.metron.stellar.dsl.Stellar; + +public class TextFunctions { + + @Stellar(name = "FUZZY_SCORE", + description = + "Returns the Fuzzy Score which indicates the similarity score between two Strings " + + + "One point is given for every matched character. Subsequent matches yield two bonus " + + + "points. A higher score indicates a higher similarity", + params = { + "string - The full term that should be matched against", + "string - The query that will be matched against a term", + "string - The IETF BCP 47 language code to use" + }, + returns = "integer representing the score") + /** + * FuzzyScoreFunction exposes the Apache Commons Text Similarity FuzzyScore through + * Stellar. + */ + public static class FuzzyScoreFunction extends BaseStellarFunction { + + @Override + public Object apply(List<Object> list) { + if (list.size() < 3) { + throw new IllegalStateException("FUZZY_SCORE expects three args: [string, string, string]"); + } + String term = (String) list.get(0); --- End diff -- done > Add Fuzzy String Scoring to Stellar > ----------------------------------- > > Key: METRON-1061 > URL: https://issues.apache.org/jira/browse/METRON-1061 > Project: Metron > Issue Type: Improvement > Reporter: Otto Fowler > Assignee: Otto Fowler > > Fuzzy string matching or scoring generates a score based on the similarity of > a string vs. a query. > Functionality for this exists in Apache commons and should be exposed through > stellar. > "A matching algorithm that is similar to the searching algorithms implemented > in editors such as Sublime Text, TextMate, Atom and others. > One point is given for every matched character. Subsequent matches yield two > bonus points. A higher score indicates a higher similarity." > https://commons.apache.org/sandbox/commons-text/jacoco/org.apache.commons.text.similarity/FuzzyScore.java.html > https://commons.apache.org/proper/commons-text/index.html -- This message was sent by Atlassian JIRA (v6.4.14#64029)