Modified: websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html ============================================================================== --- websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html (original) +++ websites/production/commons/content/sandbox/commons-text/xref/org/apache/commons/text/similarity/JaroWrinklerDistance.html Sun Apr 26 10:18:25 2015 @@ -42,341 +42,339 @@ <a class="jxr_linenumber" name="L34" href="#L34">34</a> <em class="jxr_javadoccomment"> * <p></em> <a class="jxr_linenumber" name="L35" href="#L35">35</a> <em class="jxr_javadoccomment"> * This code has been adapted from Apache Commons Lang 3.3.</em> <a class="jxr_linenumber" name="L36" href="#L36">36</a> <em class="jxr_javadoccomment"> * </p></em> -<a class="jxr_linenumber" name="L37" href="#L37">37</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L38" href="#L38">38</a> <em class="jxr_javadoccomment"> * @since 1.0</em> -<a class="jxr_linenumber" name="L39" href="#L39">39</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L40" href="#L40">40</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/JaroWrinklerDistance.html">JaroWrinklerDistance</a> <strong class="jxr_keyword">implements</strong> EditDistance<Double> { -<a class="jxr_linenumber" name="L41" href="#L41">41</a> -<a class="jxr_linenumber" name="L42" href="#L42">42</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L43" href="#L43">43</a> <em class="jxr_javadoccomment"> * The default prefix length limit set to four.</em> -<a class="jxr_linenumber" name="L44" href="#L44">44</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L45" href="#L45">45</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> PREFIX_LENGTH_LIMIT = 4; -<a class="jxr_linenumber" name="L46" href="#L46">46</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L47" href="#L47">47</a> <em class="jxr_javadoccomment"> * Represents a failed index search.</em> -<a class="jxr_linenumber" name="L48" href="#L48">48</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L49" href="#L49">49</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> INDEX_NOT_FOUND = -1; -<a class="jxr_linenumber" name="L50" href="#L50">50</a> -<a class="jxr_linenumber" name="L51" href="#L51">51</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L52" href="#L52">52</a> <em class="jxr_javadoccomment"> * Find the Jaro Winkler Distance which indicates the similarity score</em> -<a class="jxr_linenumber" name="L53" href="#L53">53</a> <em class="jxr_javadoccomment"> * between two CharSequences.</em> -<a class="jxr_linenumber" name="L54" href="#L54">54</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L55" href="#L55">55</a> <em class="jxr_javadoccomment"> * <pre></em> -<a class="jxr_linenumber" name="L56" href="#L56">56</a> <em class="jxr_javadoccomment"> * distance.apply(null, null) = IllegalArgumentException</em> -<a class="jxr_linenumber" name="L57" href="#L57">57</a> <em class="jxr_javadoccomment"> * distance.apply("","") = 0.0</em> -<a class="jxr_linenumber" name="L58" href="#L58">58</a> <em class="jxr_javadoccomment"> * distance.apply("","a") = 0.0</em> -<a class="jxr_linenumber" name="L59" href="#L59">59</a> <em class="jxr_javadoccomment"> * distance.apply("aaapppp", "") = 0.0</em> -<a class="jxr_linenumber" name="L60" href="#L60">60</a> <em class="jxr_javadoccomment"> * distance.apply("frog", "fog") = 0.93</em> -<a class="jxr_linenumber" name="L61" href="#L61">61</a> <em class="jxr_javadoccomment"> * distance.apply("fly", "ant") = 0.0</em> -<a class="jxr_linenumber" name="L62" href="#L62">62</a> <em class="jxr_javadoccomment"> * distance.apply("elephant", "hippo") = 0.44</em> -<a class="jxr_linenumber" name="L63" href="#L63">63</a> <em class="jxr_javadoccomment"> * distance.apply("hippo", "elephant") = 0.44</em> -<a class="jxr_linenumber" name="L64" href="#L64">64</a> <em class="jxr_javadoccomment"> * distance.apply("hippo", "zzzzzzzz") = 0.0</em> -<a class="jxr_linenumber" name="L65" href="#L65">65</a> <em class="jxr_javadoccomment"> * distance.apply("hello", "hallo") = 0.88</em> -<a class="jxr_linenumber" name="L66" href="#L66">66</a> <em class="jxr_javadoccomment"> * distance.apply("ABC Corporation", "ABC Corp") = 0.91</em> -<a class="jxr_linenumber" name="L67" href="#L67">67</a> <em class="jxr_javadoccomment"> * distance.apply("D N H Enterprises Inc", "D &amp; H Enterprises, Inc.") = 0.93</em> -<a class="jxr_linenumber" name="L68" href="#L68">68</a> <em class="jxr_javadoccomment"> * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94</em> -<a class="jxr_linenumber" name="L69" href="#L69">69</a> <em class="jxr_javadoccomment"> * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9</em> -<a class="jxr_linenumber" name="L70" href="#L70">70</a> <em class="jxr_javadoccomment"> * </pre></em> -<a class="jxr_linenumber" name="L71" href="#L71">71</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L72" href="#L72">72</a> <em class="jxr_javadoccomment"> * @param left the first String, must not be null</em> -<a class="jxr_linenumber" name="L73" href="#L73">73</a> <em class="jxr_javadoccomment"> * @param right the second String, must not be null</em> -<a class="jxr_linenumber" name="L74" href="#L74">74</a> <em class="jxr_javadoccomment"> * @return result distance</em> -<a class="jxr_linenumber" name="L75" href="#L75">75</a> <em class="jxr_javadoccomment"> * @throws IllegalArgumentException if either String input {@code null}</em> -<a class="jxr_linenumber" name="L76" href="#L76">76</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L77" href="#L77">77</a> @Override -<a class="jxr_linenumber" name="L78" href="#L78">78</a> <strong class="jxr_keyword">public</strong> Double apply(CharSequence left, CharSequence right) { -<a class="jxr_linenumber" name="L79" href="#L79">79</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> defaultScalingFactor = 0.1; -<a class="jxr_linenumber" name="L80" href="#L80">80</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> percentageRoundValue = 100.0; -<a class="jxr_linenumber" name="L81" href="#L81">81</a> -<a class="jxr_linenumber" name="L82" href="#L82">82</a> <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) { -<a class="jxr_linenumber" name="L83" href="#L83">83</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>); -<a class="jxr_linenumber" name="L84" href="#L84">84</a> } -<a class="jxr_linenumber" name="L85" href="#L85">85</a> -<a class="jxr_linenumber" name="L86" href="#L86">86</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> jaro = score(left, right); -<a class="jxr_linenumber" name="L87" href="#L87">87</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> cl = commonPrefixLength(left, right); -<a class="jxr_linenumber" name="L88" href="#L88">88</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> matchScore = Math.round((jaro + defaultScalingFactor -<a class="jxr_linenumber" name="L89" href="#L89">89</a> * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue; -<a class="jxr_linenumber" name="L90" href="#L90">90</a> -<a class="jxr_linenumber" name="L91" href="#L91">91</a> <strong class="jxr_keyword">return</strong> matchScore; -<a class="jxr_linenumber" name="L92" href="#L92">92</a> } -<a class="jxr_linenumber" name="L93" href="#L93">93</a> -<a class="jxr_linenumber" name="L94" href="#L94">94</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L95" href="#L95">95</a> <em class="jxr_javadoccomment"> * Calculates the number of characters from the beginning of the strings</em> -<a class="jxr_linenumber" name="L96" href="#L96">96</a> <em class="jxr_javadoccomment"> * that match exactly one-to-one, up to a maximum of four (4) characters.</em> -<a class="jxr_linenumber" name="L97" href="#L97">97</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L98" href="#L98">98</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> -<a class="jxr_linenumber" name="L99" href="#L99">99</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> -<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment"> * @return A number between 0 and 4.</em> -<a class="jxr_linenumber" name="L101" href="#L101">101</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L102" href="#L102">102</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> commonPrefixLength(<strong class="jxr_keyword">final</strong> CharSequence first, -<a class="jxr_linenumber" name="L103" href="#L103">103</a> <strong class="jxr_keyword">final</strong> CharSequence second) { -<a class="jxr_linenumber" name="L104" href="#L104">104</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> result = getCommonPrefix(first.toString(), second.toString()) -<a class="jxr_linenumber" name="L105" href="#L105">105</a> .length(); -<a class="jxr_linenumber" name="L106" href="#L106">106</a> -<a class="jxr_linenumber" name="L107" href="#L107">107</a> <em class="jxr_comment">// Limit the result to 4.</em> -<a class="jxr_linenumber" name="L108" href="#L108">108</a> <strong class="jxr_keyword">return</strong> result > PREFIX_LENGTH_LIMIT ? PREFIX_LENGTH_LIMIT : result; -<a class="jxr_linenumber" name="L109" href="#L109">109</a> } -<a class="jxr_linenumber" name="L110" href="#L110">110</a> -<a class="jxr_linenumber" name="L111" href="#L111">111</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L112" href="#L112">112</a> <em class="jxr_javadoccomment"> * Compares all Strings in an array and returns the initial sequence of</em> -<a class="jxr_linenumber" name="L113" href="#L113">113</a> <em class="jxr_javadoccomment"> * characters that is common to all of them.</em> -<a class="jxr_linenumber" name="L114" href="#L114">114</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L115" href="#L115">115</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="L116" href="#L116">116</a> <em class="jxr_javadoccomment"> * For example,</em> -<a class="jxr_linenumber" name="L117" href="#L117">117</a> <em class="jxr_javadoccomment"> * <code>getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -&gt; "i am a "</code></em> -<a class="jxr_linenumber" name="L118" href="#L118">118</a> <em class="jxr_javadoccomment"> * </p></em> -<a class="jxr_linenumber" name="L119" href="#L119">119</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L120" href="#L120">120</a> <em class="jxr_javadoccomment"> * <pre></em> -<a class="jxr_linenumber" name="L121" href="#L121">121</a> <em class="jxr_javadoccomment"> * getCommonPrefix(null) = ""</em> -<a class="jxr_linenumber" name="L122" href="#L122">122</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {}) = ""</em> -<a class="jxr_linenumber" name="L123" href="#L123">123</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc"}) = "abc"</em> -<a class="jxr_linenumber" name="L124" href="#L124">124</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {null, null}) = ""</em> -<a class="jxr_linenumber" name="L125" href="#L125">125</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", ""}) = ""</em> -<a class="jxr_linenumber" name="L126" href="#L126">126</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", null}) = ""</em> -<a class="jxr_linenumber" name="L127" href="#L127">127</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", null, null}) = ""</em> -<a class="jxr_linenumber" name="L128" href="#L128">128</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {null, null, "abc"}) = ""</em> -<a class="jxr_linenumber" name="L129" href="#L129">129</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", "abc"}) = ""</em> -<a class="jxr_linenumber" name="L130" href="#L130">130</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", ""}) = ""</em> -<a class="jxr_linenumber" name="L131" href="#L131">131</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", "abc"}) = "abc"</em> -<a class="jxr_linenumber" name="L132" href="#L132">132</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", "a"}) = "a"</em> -<a class="jxr_linenumber" name="L133" href="#L133">133</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"</em> -<a class="jxr_linenumber" name="L134" href="#L134">134</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"</em> -<a class="jxr_linenumber" name="L135" href="#L135">135</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abcde", "xyz"}) = ""</em> -<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"xyz", "abcde"}) = ""</em> -<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "</em> -<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_javadoccomment"> * </pre></em> -<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_javadoccomment"> * @param strs array of String objects, entries may be null</em> -<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_javadoccomment"> * @return the initial sequence of characters that are common to all Strings</em> -<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_javadoccomment"> * in the array; empty String if the array is null, the elements are</em> -<a class="jxr_linenumber" name="L143" href="#L143">143</a> <em class="jxr_javadoccomment"> * all null or if there is no common prefix.</em> -<a class="jxr_linenumber" name="L144" href="#L144">144</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L145" href="#L145">145</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> String getCommonPrefix(<strong class="jxr_keyword">final</strong> String... strs) { -<a class="jxr_linenumber" name="L146" href="#L146">146</a> <strong class="jxr_keyword">if</strong> (strs == <strong class="jxr_keyword">null</strong> || strs.length == 0) { -<a class="jxr_linenumber" name="L147" href="#L147">147</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; -<a class="jxr_linenumber" name="L148" href="#L148">148</a> } -<a class="jxr_linenumber" name="L149" href="#L149">149</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> smallestIndexOfDiff = indexOfDifference(strs); -<a class="jxr_linenumber" name="L150" href="#L150">150</a> <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == INDEX_NOT_FOUND) { -<a class="jxr_linenumber" name="L151" href="#L151">151</a> <em class="jxr_comment">// all strings were identical</em> -<a class="jxr_linenumber" name="L152" href="#L152">152</a> <strong class="jxr_keyword">if</strong> (strs[0] == <strong class="jxr_keyword">null</strong>) { -<a class="jxr_linenumber" name="L153" href="#L153">153</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; -<a class="jxr_linenumber" name="L154" href="#L154">154</a> } -<a class="jxr_linenumber" name="L155" href="#L155">155</a> <strong class="jxr_keyword">return</strong> strs[0]; -<a class="jxr_linenumber" name="L156" href="#L156">156</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == 0) { -<a class="jxr_linenumber" name="L157" href="#L157">157</a> <em class="jxr_comment">// there were no common initial characters</em> -<a class="jxr_linenumber" name="L158" href="#L158">158</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; -<a class="jxr_linenumber" name="L159" href="#L159">159</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="L160" href="#L160">160</a> <em class="jxr_comment">// we found a common initial character sequence</em> -<a class="jxr_linenumber" name="L161" href="#L161">161</a> <strong class="jxr_keyword">return</strong> strs[0].substring(0, smallestIndexOfDiff); -<a class="jxr_linenumber" name="L162" href="#L162">162</a> } -<a class="jxr_linenumber" name="L163" href="#L163">163</a> } -<a class="jxr_linenumber" name="L164" href="#L164">164</a> -<a class="jxr_linenumber" name="L165" href="#L165">165</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L166" href="#L166">166</a> <em class="jxr_javadoccomment"> * This method returns the Jaro-Winkler score for string matching.</em> -<a class="jxr_linenumber" name="L167" href="#L167">167</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L168" href="#L168">168</a> <em class="jxr_javadoccomment"> * @param first the first string to be matched</em> -<a class="jxr_linenumber" name="L169" href="#L169">169</a> <em class="jxr_javadoccomment"> * @param second the second string to be machted</em> -<a class="jxr_linenumber" name="L170" href="#L170">170</a> <em class="jxr_javadoccomment"> * @return matching score without scaling factor impact</em> -<a class="jxr_linenumber" name="L171" href="#L171">171</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L172" href="#L172">172</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">double</strong> score(<strong class="jxr_keyword">final</strong> CharSequence first, -<a class="jxr_linenumber" name="L173" href="#L173">173</a> <strong class="jxr_keyword">final</strong> CharSequence second) { -<a class="jxr_linenumber" name="L174" href="#L174">174</a> String shorter; -<a class="jxr_linenumber" name="L175" href="#L175">175</a> String longer; -<a class="jxr_linenumber" name="L176" href="#L176">176</a> -<a class="jxr_linenumber" name="L177" href="#L177">177</a> <em class="jxr_comment">// Determine which String is longer.</em> -<a class="jxr_linenumber" name="L178" href="#L178">178</a> <strong class="jxr_keyword">if</strong> (first.length() > second.length()) { -<a class="jxr_linenumber" name="L179" href="#L179">179</a> longer = first.toString().toLowerCase(); -<a class="jxr_linenumber" name="L180" href="#L180">180</a> shorter = second.toString().toLowerCase(); -<a class="jxr_linenumber" name="L181" href="#L181">181</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="L182" href="#L182">182</a> longer = second.toString().toLowerCase(); -<a class="jxr_linenumber" name="L183" href="#L183">183</a> shorter = first.toString().toLowerCase(); -<a class="jxr_linenumber" name="L184" href="#L184">184</a> } -<a class="jxr_linenumber" name="L185" href="#L185">185</a> -<a class="jxr_linenumber" name="L186" href="#L186">186</a> <em class="jxr_comment">// Calculate the half length() distance of the shorter String.</em> -<a class="jxr_linenumber" name="L187" href="#L187">187</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> halflength = shorter.length() / 2 + 1; -<a class="jxr_linenumber" name="L188" href="#L188">188</a> -<a class="jxr_linenumber" name="L189" href="#L189">189</a> <em class="jxr_comment">// Find the set of matching characters between the shorter and longer</em> -<a class="jxr_linenumber" name="L190" href="#L190">190</a> <em class="jxr_comment">// strings. Note that</em> -<a class="jxr_linenumber" name="L191" href="#L191">191</a> <em class="jxr_comment">// the set of matching characters may be different depending on the</em> -<a class="jxr_linenumber" name="L192" href="#L192">192</a> <em class="jxr_comment">// order of the strings.</em> -<a class="jxr_linenumber" name="L193" href="#L193">193</a> <strong class="jxr_keyword">final</strong> String m1 = getSetOfMatchingCharacterWithin(shorter, longer, +<a class="jxr_linenumber" name="L37" href="#L37">37</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L38" href="#L38">38</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/commons/text/similarity/JaroWrinklerDistance.html">JaroWrinklerDistance</a> <strong class="jxr_keyword">implements</strong> EditDistance<Double> { +<a class="jxr_linenumber" name="L39" href="#L39">39</a> +<a class="jxr_linenumber" name="L40" href="#L40">40</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L41" href="#L41">41</a> <em class="jxr_javadoccomment"> * The default prefix length limit set to four.</em> +<a class="jxr_linenumber" name="L42" href="#L42">42</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L43" href="#L43">43</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> PREFIX_LENGTH_LIMIT = 4; +<a class="jxr_linenumber" name="L44" href="#L44">44</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L45" href="#L45">45</a> <em class="jxr_javadoccomment"> * Represents a failed index search.</em> +<a class="jxr_linenumber" name="L46" href="#L46">46</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L47" href="#L47">47</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> INDEX_NOT_FOUND = -1; +<a class="jxr_linenumber" name="L48" href="#L48">48</a> +<a class="jxr_linenumber" name="L49" href="#L49">49</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L50" href="#L50">50</a> <em class="jxr_javadoccomment"> * Find the Jaro Winkler Distance which indicates the similarity score</em> +<a class="jxr_linenumber" name="L51" href="#L51">51</a> <em class="jxr_javadoccomment"> * between two CharSequences.</em> +<a class="jxr_linenumber" name="L52" href="#L52">52</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L53" href="#L53">53</a> <em class="jxr_javadoccomment"> * <pre></em> +<a class="jxr_linenumber" name="L54" href="#L54">54</a> <em class="jxr_javadoccomment"> * distance.apply(null, null) = IllegalArgumentException</em> +<a class="jxr_linenumber" name="L55" href="#L55">55</a> <em class="jxr_javadoccomment"> * distance.apply("","") = 0.0</em> +<a class="jxr_linenumber" name="L56" href="#L56">56</a> <em class="jxr_javadoccomment"> * distance.apply("","a") = 0.0</em> +<a class="jxr_linenumber" name="L57" href="#L57">57</a> <em class="jxr_javadoccomment"> * distance.apply("aaapppp", "") = 0.0</em> +<a class="jxr_linenumber" name="L58" href="#L58">58</a> <em class="jxr_javadoccomment"> * distance.apply("frog", "fog") = 0.93</em> +<a class="jxr_linenumber" name="L59" href="#L59">59</a> <em class="jxr_javadoccomment"> * distance.apply("fly", "ant") = 0.0</em> +<a class="jxr_linenumber" name="L60" href="#L60">60</a> <em class="jxr_javadoccomment"> * distance.apply("elephant", "hippo") = 0.44</em> +<a class="jxr_linenumber" name="L61" href="#L61">61</a> <em class="jxr_javadoccomment"> * distance.apply("hippo", "elephant") = 0.44</em> +<a class="jxr_linenumber" name="L62" href="#L62">62</a> <em class="jxr_javadoccomment"> * distance.apply("hippo", "zzzzzzzz") = 0.0</em> +<a class="jxr_linenumber" name="L63" href="#L63">63</a> <em class="jxr_javadoccomment"> * distance.apply("hello", "hallo") = 0.88</em> +<a class="jxr_linenumber" name="L64" href="#L64">64</a> <em class="jxr_javadoccomment"> * distance.apply("ABC Corporation", "ABC Corp") = 0.91</em> +<a class="jxr_linenumber" name="L65" href="#L65">65</a> <em class="jxr_javadoccomment"> * distance.apply("D N H Enterprises Inc", "D &amp; H Enterprises, Inc.") = 0.93</em> +<a class="jxr_linenumber" name="L66" href="#L66">66</a> <em class="jxr_javadoccomment"> * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94</em> +<a class="jxr_linenumber" name="L67" href="#L67">67</a> <em class="jxr_javadoccomment"> * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9</em> +<a class="jxr_linenumber" name="L68" href="#L68">68</a> <em class="jxr_javadoccomment"> * </pre></em> +<a class="jxr_linenumber" name="L69" href="#L69">69</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L70" href="#L70">70</a> <em class="jxr_javadoccomment"> * @param left the first String, must not be null</em> +<a class="jxr_linenumber" name="L71" href="#L71">71</a> <em class="jxr_javadoccomment"> * @param right the second String, must not be null</em> +<a class="jxr_linenumber" name="L72" href="#L72">72</a> <em class="jxr_javadoccomment"> * @return result distance</em> +<a class="jxr_linenumber" name="L73" href="#L73">73</a> <em class="jxr_javadoccomment"> * @throws IllegalArgumentException if either String input {@code null}</em> +<a class="jxr_linenumber" name="L74" href="#L74">74</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L75" href="#L75">75</a> @Override +<a class="jxr_linenumber" name="L76" href="#L76">76</a> <strong class="jxr_keyword">public</strong> Double apply(CharSequence left, CharSequence right) { +<a class="jxr_linenumber" name="L77" href="#L77">77</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> defaultScalingFactor = 0.1; +<a class="jxr_linenumber" name="L78" href="#L78">78</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> percentageRoundValue = 100.0; +<a class="jxr_linenumber" name="L79" href="#L79">79</a> +<a class="jxr_linenumber" name="L80" href="#L80">80</a> <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) { +<a class="jxr_linenumber" name="L81" href="#L81">81</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>); +<a class="jxr_linenumber" name="L82" href="#L82">82</a> } +<a class="jxr_linenumber" name="L83" href="#L83">83</a> +<a class="jxr_linenumber" name="L84" href="#L84">84</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> jaro = score(left, right); +<a class="jxr_linenumber" name="L85" href="#L85">85</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> cl = commonPrefixLength(left, right); +<a class="jxr_linenumber" name="L86" href="#L86">86</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> matchScore = Math.round((jaro + defaultScalingFactor +<a class="jxr_linenumber" name="L87" href="#L87">87</a> * cl * (1.0 - jaro)) * percentageRoundValue) / percentageRoundValue; +<a class="jxr_linenumber" name="L88" href="#L88">88</a> +<a class="jxr_linenumber" name="L89" href="#L89">89</a> <strong class="jxr_keyword">return</strong> matchScore; +<a class="jxr_linenumber" name="L90" href="#L90">90</a> } +<a class="jxr_linenumber" name="L91" href="#L91">91</a> +<a class="jxr_linenumber" name="L92" href="#L92">92</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L93" href="#L93">93</a> <em class="jxr_javadoccomment"> * Calculates the number of characters from the beginning of the strings</em> +<a class="jxr_linenumber" name="L94" href="#L94">94</a> <em class="jxr_javadoccomment"> * that match exactly one-to-one, up to a maximum of four (4) characters.</em> +<a class="jxr_linenumber" name="L95" href="#L95">95</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L96" href="#L96">96</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> +<a class="jxr_linenumber" name="L97" href="#L97">97</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> +<a class="jxr_linenumber" name="L98" href="#L98">98</a> <em class="jxr_javadoccomment"> * @return A number between 0 and 4.</em> +<a class="jxr_linenumber" name="L99" href="#L99">99</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L100" href="#L100">100</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> commonPrefixLength(<strong class="jxr_keyword">final</strong> CharSequence first, +<a class="jxr_linenumber" name="L101" href="#L101">101</a> <strong class="jxr_keyword">final</strong> CharSequence second) { +<a class="jxr_linenumber" name="L102" href="#L102">102</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> result = getCommonPrefix(first.toString(), second.toString()) +<a class="jxr_linenumber" name="L103" href="#L103">103</a> .length(); +<a class="jxr_linenumber" name="L104" href="#L104">104</a> +<a class="jxr_linenumber" name="L105" href="#L105">105</a> <em class="jxr_comment">// Limit the result to 4.</em> +<a class="jxr_linenumber" name="L106" href="#L106">106</a> <strong class="jxr_keyword">return</strong> result > PREFIX_LENGTH_LIMIT ? PREFIX_LENGTH_LIMIT : result; +<a class="jxr_linenumber" name="L107" href="#L107">107</a> } +<a class="jxr_linenumber" name="L108" href="#L108">108</a> +<a class="jxr_linenumber" name="L109" href="#L109">109</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L110" href="#L110">110</a> <em class="jxr_javadoccomment"> * Compares all Strings in an array and returns the initial sequence of</em> +<a class="jxr_linenumber" name="L111" href="#L111">111</a> <em class="jxr_javadoccomment"> * characters that is common to all of them.</em> +<a class="jxr_linenumber" name="L112" href="#L112">112</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L113" href="#L113">113</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="L114" href="#L114">114</a> <em class="jxr_javadoccomment"> * For example,</em> +<a class="jxr_linenumber" name="L115" href="#L115">115</a> <em class="jxr_javadoccomment"> * <code>getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -&gt; "i am a "</code></em> +<a class="jxr_linenumber" name="L116" href="#L116">116</a> <em class="jxr_javadoccomment"> * </p></em> +<a class="jxr_linenumber" name="L117" href="#L117">117</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L118" href="#L118">118</a> <em class="jxr_javadoccomment"> * <pre></em> +<a class="jxr_linenumber" name="L119" href="#L119">119</a> <em class="jxr_javadoccomment"> * getCommonPrefix(null) = ""</em> +<a class="jxr_linenumber" name="L120" href="#L120">120</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {}) = ""</em> +<a class="jxr_linenumber" name="L121" href="#L121">121</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc"}) = "abc"</em> +<a class="jxr_linenumber" name="L122" href="#L122">122</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {null, null}) = ""</em> +<a class="jxr_linenumber" name="L123" href="#L123">123</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", ""}) = ""</em> +<a class="jxr_linenumber" name="L124" href="#L124">124</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", null}) = ""</em> +<a class="jxr_linenumber" name="L125" href="#L125">125</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", null, null}) = ""</em> +<a class="jxr_linenumber" name="L126" href="#L126">126</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {null, null, "abc"}) = ""</em> +<a class="jxr_linenumber" name="L127" href="#L127">127</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"", "abc"}) = ""</em> +<a class="jxr_linenumber" name="L128" href="#L128">128</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", ""}) = ""</em> +<a class="jxr_linenumber" name="L129" href="#L129">129</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", "abc"}) = "abc"</em> +<a class="jxr_linenumber" name="L130" href="#L130">130</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abc", "a"}) = "a"</em> +<a class="jxr_linenumber" name="L131" href="#L131">131</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"</em> +<a class="jxr_linenumber" name="L132" href="#L132">132</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"</em> +<a class="jxr_linenumber" name="L133" href="#L133">133</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"abcde", "xyz"}) = ""</em> +<a class="jxr_linenumber" name="L134" href="#L134">134</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"xyz", "abcde"}) = ""</em> +<a class="jxr_linenumber" name="L135" href="#L135">135</a> <em class="jxr_javadoccomment"> * getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "</em> +<a class="jxr_linenumber" name="L136" href="#L136">136</a> <em class="jxr_javadoccomment"> * </pre></em> +<a class="jxr_linenumber" name="L137" href="#L137">137</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L138" href="#L138">138</a> <em class="jxr_javadoccomment"> * @param strs array of String objects, entries may be null</em> +<a class="jxr_linenumber" name="L139" href="#L139">139</a> <em class="jxr_javadoccomment"> * @return the initial sequence of characters that are common to all Strings</em> +<a class="jxr_linenumber" name="L140" href="#L140">140</a> <em class="jxr_javadoccomment"> * in the array; empty String if the array is null, the elements are</em> +<a class="jxr_linenumber" name="L141" href="#L141">141</a> <em class="jxr_javadoccomment"> * all null or if there is no common prefix.</em> +<a class="jxr_linenumber" name="L142" href="#L142">142</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L143" href="#L143">143</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> String getCommonPrefix(<strong class="jxr_keyword">final</strong> String... strs) { +<a class="jxr_linenumber" name="L144" href="#L144">144</a> <strong class="jxr_keyword">if</strong> (strs == <strong class="jxr_keyword">null</strong> || strs.length == 0) { +<a class="jxr_linenumber" name="L145" href="#L145">145</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; +<a class="jxr_linenumber" name="L146" href="#L146">146</a> } +<a class="jxr_linenumber" name="L147" href="#L147">147</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> smallestIndexOfDiff = indexOfDifference(strs); +<a class="jxr_linenumber" name="L148" href="#L148">148</a> <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == INDEX_NOT_FOUND) { +<a class="jxr_linenumber" name="L149" href="#L149">149</a> <em class="jxr_comment">// all strings were identical</em> +<a class="jxr_linenumber" name="L150" href="#L150">150</a> <strong class="jxr_keyword">if</strong> (strs[0] == <strong class="jxr_keyword">null</strong>) { +<a class="jxr_linenumber" name="L151" href="#L151">151</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; +<a class="jxr_linenumber" name="L152" href="#L152">152</a> } +<a class="jxr_linenumber" name="L153" href="#L153">153</a> <strong class="jxr_keyword">return</strong> strs[0]; +<a class="jxr_linenumber" name="L154" href="#L154">154</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (smallestIndexOfDiff == 0) { +<a class="jxr_linenumber" name="L155" href="#L155">155</a> <em class="jxr_comment">// there were no common initial characters</em> +<a class="jxr_linenumber" name="L156" href="#L156">156</a> <strong class="jxr_keyword">return</strong> <span class="jxr_string">""</span>; +<a class="jxr_linenumber" name="L157" href="#L157">157</a> } <strong class="jxr_keyword">else</strong> { +<a class="jxr_linenumber" name="L158" href="#L158">158</a> <em class="jxr_comment">// we found a common initial character sequence</em> +<a class="jxr_linenumber" name="L159" href="#L159">159</a> <strong class="jxr_keyword">return</strong> strs[0].substring(0, smallestIndexOfDiff); +<a class="jxr_linenumber" name="L160" href="#L160">160</a> } +<a class="jxr_linenumber" name="L161" href="#L161">161</a> } +<a class="jxr_linenumber" name="L162" href="#L162">162</a> +<a class="jxr_linenumber" name="L163" href="#L163">163</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L164" href="#L164">164</a> <em class="jxr_javadoccomment"> * This method returns the Jaro-Winkler score for string matching.</em> +<a class="jxr_linenumber" name="L165" href="#L165">165</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L166" href="#L166">166</a> <em class="jxr_javadoccomment"> * @param first the first string to be matched</em> +<a class="jxr_linenumber" name="L167" href="#L167">167</a> <em class="jxr_javadoccomment"> * @param second the second string to be machted</em> +<a class="jxr_linenumber" name="L168" href="#L168">168</a> <em class="jxr_javadoccomment"> * @return matching score without scaling factor impact</em> +<a class="jxr_linenumber" name="L169" href="#L169">169</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L170" href="#L170">170</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">double</strong> score(<strong class="jxr_keyword">final</strong> CharSequence first, +<a class="jxr_linenumber" name="L171" href="#L171">171</a> <strong class="jxr_keyword">final</strong> CharSequence second) { +<a class="jxr_linenumber" name="L172" href="#L172">172</a> String shorter; +<a class="jxr_linenumber" name="L173" href="#L173">173</a> String longer; +<a class="jxr_linenumber" name="L174" href="#L174">174</a> +<a class="jxr_linenumber" name="L175" href="#L175">175</a> <em class="jxr_comment">// Determine which String is longer.</em> +<a class="jxr_linenumber" name="L176" href="#L176">176</a> <strong class="jxr_keyword">if</strong> (first.length() > second.length()) { +<a class="jxr_linenumber" name="L177" href="#L177">177</a> longer = first.toString().toLowerCase(); +<a class="jxr_linenumber" name="L178" href="#L178">178</a> shorter = second.toString().toLowerCase(); +<a class="jxr_linenumber" name="L179" href="#L179">179</a> } <strong class="jxr_keyword">else</strong> { +<a class="jxr_linenumber" name="L180" href="#L180">180</a> longer = second.toString().toLowerCase(); +<a class="jxr_linenumber" name="L181" href="#L181">181</a> shorter = first.toString().toLowerCase(); +<a class="jxr_linenumber" name="L182" href="#L182">182</a> } +<a class="jxr_linenumber" name="L183" href="#L183">183</a> +<a class="jxr_linenumber" name="L184" href="#L184">184</a> <em class="jxr_comment">// Calculate the half length() distance of the shorter String.</em> +<a class="jxr_linenumber" name="L185" href="#L185">185</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> halflength = shorter.length() / 2 + 1; +<a class="jxr_linenumber" name="L186" href="#L186">186</a> +<a class="jxr_linenumber" name="L187" href="#L187">187</a> <em class="jxr_comment">// Find the set of matching characters between the shorter and longer</em> +<a class="jxr_linenumber" name="L188" href="#L188">188</a> <em class="jxr_comment">// strings. Note that</em> +<a class="jxr_linenumber" name="L189" href="#L189">189</a> <em class="jxr_comment">// the set of matching characters may be different depending on the</em> +<a class="jxr_linenumber" name="L190" href="#L190">190</a> <em class="jxr_comment">// order of the strings.</em> +<a class="jxr_linenumber" name="L191" href="#L191">191</a> <strong class="jxr_keyword">final</strong> String m1 = getSetOfMatchingCharacterWithin(shorter, longer, +<a class="jxr_linenumber" name="L192" href="#L192">192</a> halflength); +<a class="jxr_linenumber" name="L193" href="#L193">193</a> <strong class="jxr_keyword">final</strong> String m2 = getSetOfMatchingCharacterWithin(longer, shorter, <a class="jxr_linenumber" name="L194" href="#L194">194</a> halflength); -<a class="jxr_linenumber" name="L195" href="#L195">195</a> <strong class="jxr_keyword">final</strong> String m2 = getSetOfMatchingCharacterWithin(longer, shorter, -<a class="jxr_linenumber" name="L196" href="#L196">196</a> halflength); -<a class="jxr_linenumber" name="L197" href="#L197">197</a> -<a class="jxr_linenumber" name="L198" href="#L198">198</a> <em class="jxr_comment">// If one or both of the sets of common characters is empty, then</em> -<a class="jxr_linenumber" name="L199" href="#L199">199</a> <em class="jxr_comment">// there is no similarity between the two strings.</em> -<a class="jxr_linenumber" name="L200" href="#L200">200</a> <strong class="jxr_keyword">if</strong> (m1.length() == 0 || m2.length() == 0) { -<a class="jxr_linenumber" name="L201" href="#L201">201</a> <strong class="jxr_keyword">return</strong> 0.0; -<a class="jxr_linenumber" name="L202" href="#L202">202</a> } -<a class="jxr_linenumber" name="L203" href="#L203">203</a> -<a class="jxr_linenumber" name="L204" href="#L204">204</a> <em class="jxr_comment">// If the set of common characters is not the same size, then</em> -<a class="jxr_linenumber" name="L205" href="#L205">205</a> <em class="jxr_comment">// there is no similarity between the two strings, either.</em> -<a class="jxr_linenumber" name="L206" href="#L206">206</a> <strong class="jxr_keyword">if</strong> (m1.length() != m2.length()) { -<a class="jxr_linenumber" name="L207" href="#L207">207</a> <strong class="jxr_keyword">return</strong> 0.0; -<a class="jxr_linenumber" name="L208" href="#L208">208</a> } -<a class="jxr_linenumber" name="L209" href="#L209">209</a> -<a class="jxr_linenumber" name="L210" href="#L210">210</a> <em class="jxr_comment">// Calculate the number of transposition between the two sets</em> -<a class="jxr_linenumber" name="L211" href="#L211">211</a> <em class="jxr_comment">// of common characters.</em> -<a class="jxr_linenumber" name="L212" href="#L212">212</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> transpositions = transpositions(m1, m2); +<a class="jxr_linenumber" name="L195" href="#L195">195</a> +<a class="jxr_linenumber" name="L196" href="#L196">196</a> <em class="jxr_comment">// If one or both of the sets of common characters is empty, then</em> +<a class="jxr_linenumber" name="L197" href="#L197">197</a> <em class="jxr_comment">// there is no similarity between the two strings.</em> +<a class="jxr_linenumber" name="L198" href="#L198">198</a> <strong class="jxr_keyword">if</strong> (m1.length() == 0 || m2.length() == 0) { +<a class="jxr_linenumber" name="L199" href="#L199">199</a> <strong class="jxr_keyword">return</strong> 0.0; +<a class="jxr_linenumber" name="L200" href="#L200">200</a> } +<a class="jxr_linenumber" name="L201" href="#L201">201</a> +<a class="jxr_linenumber" name="L202" href="#L202">202</a> <em class="jxr_comment">// If the set of common characters is not the same size, then</em> +<a class="jxr_linenumber" name="L203" href="#L203">203</a> <em class="jxr_comment">// there is no similarity between the two strings, either.</em> +<a class="jxr_linenumber" name="L204" href="#L204">204</a> <strong class="jxr_keyword">if</strong> (m1.length() != m2.length()) { +<a class="jxr_linenumber" name="L205" href="#L205">205</a> <strong class="jxr_keyword">return</strong> 0.0; +<a class="jxr_linenumber" name="L206" href="#L206">206</a> } +<a class="jxr_linenumber" name="L207" href="#L207">207</a> +<a class="jxr_linenumber" name="L208" href="#L208">208</a> <em class="jxr_comment">// Calculate the number of transposition between the two sets</em> +<a class="jxr_linenumber" name="L209" href="#L209">209</a> <em class="jxr_comment">// of common characters.</em> +<a class="jxr_linenumber" name="L210" href="#L210">210</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> transpositions = transpositions(m1, m2); +<a class="jxr_linenumber" name="L211" href="#L211">211</a> +<a class="jxr_linenumber" name="L212" href="#L212">212</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> defaultDenominator = 3.0; <a class="jxr_linenumber" name="L213" href="#L213">213</a> -<a class="jxr_linenumber" name="L214" href="#L214">214</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> defaultDenominator = 3.0; -<a class="jxr_linenumber" name="L215" href="#L215">215</a> -<a class="jxr_linenumber" name="L216" href="#L216">216</a> <em class="jxr_comment">// Calculate the distance.</em> -<a class="jxr_linenumber" name="L217" href="#L217">217</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> dist = (m1.length() / ((<strong class="jxr_keyword">double</strong>) shorter.length()) -<a class="jxr_linenumber" name="L218" href="#L218">218</a> + m2.length() / ((<strong class="jxr_keyword">double</strong>) longer.length()) + (m1.length() - transpositions) -<a class="jxr_linenumber" name="L219" href="#L219">219</a> / ((<strong class="jxr_keyword">double</strong>) m1.length())) / defaultDenominator; -<a class="jxr_linenumber" name="L220" href="#L220">220</a> <strong class="jxr_keyword">return</strong> dist; -<a class="jxr_linenumber" name="L221" href="#L221">221</a> } -<a class="jxr_linenumber" name="L222" href="#L222">222</a> -<a class="jxr_linenumber" name="L223" href="#L223">223</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L224" href="#L224">224</a> <em class="jxr_javadoccomment"> * Calculates the number of transposition between two strings.</em> -<a class="jxr_linenumber" name="L225" href="#L225">225</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L226" href="#L226">226</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> -<a class="jxr_linenumber" name="L227" href="#L227">227</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> -<a class="jxr_linenumber" name="L228" href="#L228">228</a> <em class="jxr_javadoccomment"> * @return The number of transposition between the two strings.</em> -<a class="jxr_linenumber" name="L229" href="#L229">229</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L230" href="#L230">230</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> transpositions(<strong class="jxr_keyword">final</strong> CharSequence first, -<a class="jxr_linenumber" name="L231" href="#L231">231</a> <strong class="jxr_keyword">final</strong> CharSequence second) { -<a class="jxr_linenumber" name="L232" href="#L232">232</a> <strong class="jxr_keyword">int</strong> transpositions = 0; -<a class="jxr_linenumber" name="L233" href="#L233">233</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < first.length(); i++) { -<a class="jxr_linenumber" name="L234" href="#L234">234</a> <strong class="jxr_keyword">if</strong> (first.charAt(i) != second.charAt(i)) { -<a class="jxr_linenumber" name="L235" href="#L235">235</a> transpositions++; -<a class="jxr_linenumber" name="L236" href="#L236">236</a> } -<a class="jxr_linenumber" name="L237" href="#L237">237</a> } -<a class="jxr_linenumber" name="L238" href="#L238">238</a> <strong class="jxr_keyword">return</strong> transpositions / 2; -<a class="jxr_linenumber" name="L239" href="#L239">239</a> } -<a class="jxr_linenumber" name="L240" href="#L240">240</a> -<a class="jxr_linenumber" name="L241" href="#L241">241</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L242" href="#L242">242</a> <em class="jxr_javadoccomment"> * Compares all CharSequences in an array and returns the index at which the</em> -<a class="jxr_linenumber" name="L243" href="#L243">243</a> <em class="jxr_javadoccomment"> * CharSequences begin to differ.</em> -<a class="jxr_linenumber" name="L244" href="#L244">244</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L245" href="#L245">245</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="L246" href="#L246">246</a> <em class="jxr_javadoccomment"> * For example,</em> -<a class="jxr_linenumber" name="L247" href="#L247">247</a> <em class="jxr_javadoccomment"> * <code>indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -&gt; 7</code></em> -<a class="jxr_linenumber" name="L248" href="#L248">248</a> <em class="jxr_javadoccomment"> * </p></em> -<a class="jxr_linenumber" name="L249" href="#L249">249</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L250" href="#L250">250</a> <em class="jxr_javadoccomment"> * <pre></em> -<a class="jxr_linenumber" name="L251" href="#L251">251</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(null) = -1</em> -<a class="jxr_linenumber" name="L252" href="#L252">252</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {}) = -1</em> -<a class="jxr_linenumber" name="L253" href="#L253">253</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc"}) = -1</em> -<a class="jxr_linenumber" name="L254" href="#L254">254</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {null, null}) = -1</em> -<a class="jxr_linenumber" name="L255" href="#L255">255</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", ""}) = -1</em> -<a class="jxr_linenumber" name="L256" href="#L256">256</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", null}) = 0</em> -<a class="jxr_linenumber" name="L257" href="#L257">257</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", null, null}) = 0</em> -<a class="jxr_linenumber" name="L258" href="#L258">258</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0</em> -<a class="jxr_linenumber" name="L259" href="#L259">259</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", "abc"}) = 0</em> -<a class="jxr_linenumber" name="L260" href="#L260">260</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", ""}) = 0</em> -<a class="jxr_linenumber" name="L261" href="#L261">261</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1</em> -<a class="jxr_linenumber" name="L262" href="#L262">262</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", "a"}) = 1</em> -<a class="jxr_linenumber" name="L263" href="#L263">263</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2</em> -<a class="jxr_linenumber" name="L264" href="#L264">264</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2</em> -<a class="jxr_linenumber" name="L265" href="#L265">265</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0</em> -<a class="jxr_linenumber" name="L266" href="#L266">266</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0</em> -<a class="jxr_linenumber" name="L267" href="#L267">267</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7</em> -<a class="jxr_linenumber" name="L268" href="#L268">268</a> <em class="jxr_javadoccomment"> * </pre></em> -<a class="jxr_linenumber" name="L269" href="#L269">269</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L270" href="#L270">270</a> <em class="jxr_javadoccomment"> * @param css array of CharSequences, entries may be null</em> -<a class="jxr_linenumber" name="L271" href="#L271">271</a> <em class="jxr_javadoccomment"> * @return the index where the strings begin to differ; -1 if they are all</em> -<a class="jxr_linenumber" name="L272" href="#L272">272</a> <em class="jxr_javadoccomment"> * equal</em> -<a class="jxr_linenumber" name="L273" href="#L273">273</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L274" href="#L274">274</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> indexOfDifference(<strong class="jxr_keyword">final</strong> CharSequence... css) { -<a class="jxr_linenumber" name="L275" href="#L275">275</a> <strong class="jxr_keyword">if</strong> (css == <strong class="jxr_keyword">null</strong> || css.length <= 1) { -<a class="jxr_linenumber" name="L276" href="#L276">276</a> <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND; -<a class="jxr_linenumber" name="L277" href="#L277">277</a> } -<a class="jxr_linenumber" name="L278" href="#L278">278</a> <strong class="jxr_keyword">boolean</strong> anyStringNull = false; -<a class="jxr_linenumber" name="L279" href="#L279">279</a> <strong class="jxr_keyword">boolean</strong> allStringsNull = <strong class="jxr_keyword">true</strong>; -<a class="jxr_linenumber" name="L280" href="#L280">280</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> arrayLen = css.length; -<a class="jxr_linenumber" name="L281" href="#L281">281</a> <strong class="jxr_keyword">int</strong> shortestStrLen = Integer.MAX_VALUE; -<a class="jxr_linenumber" name="L282" href="#L282">282</a> <strong class="jxr_keyword">int</strong> longestStrLen = 0; -<a class="jxr_linenumber" name="L283" href="#L283">283</a> -<a class="jxr_linenumber" name="L284" href="#L284">284</a> <em class="jxr_comment">// find the min and max string lengths; this avoids checking to make</em> -<a class="jxr_linenumber" name="L285" href="#L285">285</a> <em class="jxr_comment">// sure we are not exceeding the length of the string each time through</em> -<a class="jxr_linenumber" name="L286" href="#L286">286</a> <em class="jxr_comment">// the bottom loop.</em> -<a class="jxr_linenumber" name="L287" href="#L287">287</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < arrayLen; i++) { -<a class="jxr_linenumber" name="L288" href="#L288">288</a> <strong class="jxr_keyword">if</strong> (css[i] == <strong class="jxr_keyword">null</strong>) { -<a class="jxr_linenumber" name="L289" href="#L289">289</a> anyStringNull = <strong class="jxr_keyword">true</strong>; -<a class="jxr_linenumber" name="L290" href="#L290">290</a> shortestStrLen = 0; -<a class="jxr_linenumber" name="L291" href="#L291">291</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="L292" href="#L292">292</a> allStringsNull = false; -<a class="jxr_linenumber" name="L293" href="#L293">293</a> shortestStrLen = Math.min(css[i].length(), shortestStrLen); -<a class="jxr_linenumber" name="L294" href="#L294">294</a> longestStrLen = Math.max(css[i].length(), longestStrLen); -<a class="jxr_linenumber" name="L295" href="#L295">295</a> } -<a class="jxr_linenumber" name="L296" href="#L296">296</a> } -<a class="jxr_linenumber" name="L297" href="#L297">297</a> -<a class="jxr_linenumber" name="L298" href="#L298">298</a> <em class="jxr_comment">// handle lists containing all nulls or all empty strings</em> -<a class="jxr_linenumber" name="L299" href="#L299">299</a> <strong class="jxr_keyword">if</strong> (allStringsNull || longestStrLen == 0 && !anyStringNull) { -<a class="jxr_linenumber" name="L300" href="#L300">300</a> <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND; -<a class="jxr_linenumber" name="L301" href="#L301">301</a> } -<a class="jxr_linenumber" name="L302" href="#L302">302</a> -<a class="jxr_linenumber" name="L303" href="#L303">303</a> <em class="jxr_comment">// handle lists containing some nulls or some empty strings</em> -<a class="jxr_linenumber" name="L304" href="#L304">304</a> <strong class="jxr_keyword">if</strong> (shortestStrLen == 0) { -<a class="jxr_linenumber" name="L305" href="#L305">305</a> <strong class="jxr_keyword">return</strong> 0; -<a class="jxr_linenumber" name="L306" href="#L306">306</a> } -<a class="jxr_linenumber" name="L307" href="#L307">307</a> -<a class="jxr_linenumber" name="L308" href="#L308">308</a> <em class="jxr_comment">// find the position with the first difference across all strings</em> -<a class="jxr_linenumber" name="L309" href="#L309">309</a> <strong class="jxr_keyword">int</strong> firstDiff = -1; -<a class="jxr_linenumber" name="L310" href="#L310">310</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> stringPos = 0; stringPos < shortestStrLen; stringPos++) { -<a class="jxr_linenumber" name="L311" href="#L311">311</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> comparisonChar = css[0].charAt(stringPos); -<a class="jxr_linenumber" name="L312" href="#L312">312</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> arrayPos = 1; arrayPos < arrayLen; arrayPos++) { -<a class="jxr_linenumber" name="L313" href="#L313">313</a> <strong class="jxr_keyword">if</strong> (css[arrayPos].charAt(stringPos) != comparisonChar) { -<a class="jxr_linenumber" name="L314" href="#L314">314</a> firstDiff = stringPos; -<a class="jxr_linenumber" name="L315" href="#L315">315</a> <strong class="jxr_keyword">break</strong>; -<a class="jxr_linenumber" name="L316" href="#L316">316</a> } -<a class="jxr_linenumber" name="L317" href="#L317">317</a> } -<a class="jxr_linenumber" name="L318" href="#L318">318</a> <strong class="jxr_keyword">if</strong> (firstDiff != -1) { -<a class="jxr_linenumber" name="L319" href="#L319">319</a> <strong class="jxr_keyword">break</strong>; -<a class="jxr_linenumber" name="L320" href="#L320">320</a> } -<a class="jxr_linenumber" name="L321" href="#L321">321</a> } -<a class="jxr_linenumber" name="L322" href="#L322">322</a> -<a class="jxr_linenumber" name="L323" href="#L323">323</a> <strong class="jxr_keyword">if</strong> (firstDiff == -1 && shortestStrLen != longestStrLen) { -<a class="jxr_linenumber" name="L324" href="#L324">324</a> <em class="jxr_comment">// we compared all of the characters up to the length of the</em> -<a class="jxr_linenumber" name="L325" href="#L325">325</a> <em class="jxr_comment">// shortest string and didn't find a match, but the string lengths</em> -<a class="jxr_linenumber" name="L326" href="#L326">326</a> <em class="jxr_comment">// vary, so return the length of the shortest string.</em> -<a class="jxr_linenumber" name="L327" href="#L327">327</a> <strong class="jxr_keyword">return</strong> shortestStrLen; -<a class="jxr_linenumber" name="L328" href="#L328">328</a> } -<a class="jxr_linenumber" name="L329" href="#L329">329</a> <strong class="jxr_keyword">return</strong> firstDiff; -<a class="jxr_linenumber" name="L330" href="#L330">330</a> } -<a class="jxr_linenumber" name="L331" href="#L331">331</a> -<a class="jxr_linenumber" name="L332" href="#L332">332</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L333" href="#L333">333</a> <em class="jxr_javadoccomment"> * Gets a set of matching characters between two strings.</em> -<a class="jxr_linenumber" name="L334" href="#L334">334</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L335" href="#L335">335</a> <em class="jxr_javadoccomment"> * <p></em> -<a class="jxr_linenumber" name="L336" href="#L336">336</a> <em class="jxr_javadoccomment"> * Two characters from the first string and the second string are</em> -<a class="jxr_linenumber" name="L337" href="#L337">337</a> <em class="jxr_javadoccomment"> * considered matching if the character's respective positions are no</em> -<a class="jxr_linenumber" name="L338" href="#L338">338</a> <em class="jxr_javadoccomment"> * farther than the limit value.</em> -<a class="jxr_linenumber" name="L339" href="#L339">339</a> <em class="jxr_javadoccomment"> * </p></em> -<a class="jxr_linenumber" name="L340" href="#L340">340</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L341" href="#L341">341</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> -<a class="jxr_linenumber" name="L342" href="#L342">342</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> -<a class="jxr_linenumber" name="L343" href="#L343">343</a> <em class="jxr_javadoccomment"> * @param limit The maximum distance to consider.</em> -<a class="jxr_linenumber" name="L344" href="#L344">344</a> <em class="jxr_javadoccomment"> * @return A string contain the set of common characters.</em> -<a class="jxr_linenumber" name="L345" href="#L345">345</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L346" href="#L346">346</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> String getSetOfMatchingCharacterWithin( -<a class="jxr_linenumber" name="L347" href="#L347">347</a> <strong class="jxr_keyword">final</strong> CharSequence first, <strong class="jxr_keyword">final</strong> CharSequence second, <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> limit) { -<a class="jxr_linenumber" name="L348" href="#L348">348</a> <strong class="jxr_keyword">final</strong> StringBuilder common = <strong class="jxr_keyword">new</strong> StringBuilder(); -<a class="jxr_linenumber" name="L349" href="#L349">349</a> <strong class="jxr_keyword">final</strong> StringBuilder copy = <strong class="jxr_keyword">new</strong> StringBuilder(second); -<a class="jxr_linenumber" name="L350" href="#L350">350</a> -<a class="jxr_linenumber" name="L351" href="#L351">351</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < first.length(); i++) { -<a class="jxr_linenumber" name="L352" href="#L352">352</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> ch = first.charAt(i); -<a class="jxr_linenumber" name="L353" href="#L353">353</a> <strong class="jxr_keyword">boolean</strong> found = false; -<a class="jxr_linenumber" name="L354" href="#L354">354</a> -<a class="jxr_linenumber" name="L355" href="#L355">355</a> <em class="jxr_comment">// See if the character is within the limit positions away from the</em> -<a class="jxr_linenumber" name="L356" href="#L356">356</a> <em class="jxr_comment">// original position of that character.</em> -<a class="jxr_linenumber" name="L357" href="#L357">357</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = Math.max(0, i - limit); !found -<a class="jxr_linenumber" name="L358" href="#L358">358</a> && j < Math.min(i + limit, second.length()); j++) { -<a class="jxr_linenumber" name="L359" href="#L359">359</a> <strong class="jxr_keyword">if</strong> (copy.charAt(j) == ch) { -<a class="jxr_linenumber" name="L360" href="#L360">360</a> found = <strong class="jxr_keyword">true</strong>; -<a class="jxr_linenumber" name="L361" href="#L361">361</a> common.append(ch); -<a class="jxr_linenumber" name="L362" href="#L362">362</a> copy.setCharAt(j, '*'); -<a class="jxr_linenumber" name="L363" href="#L363">363</a> } -<a class="jxr_linenumber" name="L364" href="#L364">364</a> } -<a class="jxr_linenumber" name="L365" href="#L365">365</a> } -<a class="jxr_linenumber" name="L366" href="#L366">366</a> <strong class="jxr_keyword">return</strong> common.toString(); -<a class="jxr_linenumber" name="L367" href="#L367">367</a> } -<a class="jxr_linenumber" name="L368" href="#L368">368</a> -<a class="jxr_linenumber" name="L369" href="#L369">369</a> } +<a class="jxr_linenumber" name="L214" href="#L214">214</a> <em class="jxr_comment">// Calculate the distance.</em> +<a class="jxr_linenumber" name="L215" href="#L215">215</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> dist = (m1.length() / ((<strong class="jxr_keyword">double</strong>) shorter.length()) +<a class="jxr_linenumber" name="L216" href="#L216">216</a> + m2.length() / ((<strong class="jxr_keyword">double</strong>) longer.length()) + (m1.length() - transpositions) +<a class="jxr_linenumber" name="L217" href="#L217">217</a> / ((<strong class="jxr_keyword">double</strong>) m1.length())) / defaultDenominator; +<a class="jxr_linenumber" name="L218" href="#L218">218</a> <strong class="jxr_keyword">return</strong> dist; +<a class="jxr_linenumber" name="L219" href="#L219">219</a> } +<a class="jxr_linenumber" name="L220" href="#L220">220</a> +<a class="jxr_linenumber" name="L221" href="#L221">221</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L222" href="#L222">222</a> <em class="jxr_javadoccomment"> * Calculates the number of transposition between two strings.</em> +<a class="jxr_linenumber" name="L223" href="#L223">223</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L224" href="#L224">224</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> +<a class="jxr_linenumber" name="L225" href="#L225">225</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> +<a class="jxr_linenumber" name="L226" href="#L226">226</a> <em class="jxr_javadoccomment"> * @return The number of transposition between the two strings.</em> +<a class="jxr_linenumber" name="L227" href="#L227">227</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L228" href="#L228">228</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> transpositions(<strong class="jxr_keyword">final</strong> CharSequence first, +<a class="jxr_linenumber" name="L229" href="#L229">229</a> <strong class="jxr_keyword">final</strong> CharSequence second) { +<a class="jxr_linenumber" name="L230" href="#L230">230</a> <strong class="jxr_keyword">int</strong> transpositions = 0; +<a class="jxr_linenumber" name="L231" href="#L231">231</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < first.length(); i++) { +<a class="jxr_linenumber" name="L232" href="#L232">232</a> <strong class="jxr_keyword">if</strong> (first.charAt(i) != second.charAt(i)) { +<a class="jxr_linenumber" name="L233" href="#L233">233</a> transpositions++; +<a class="jxr_linenumber" name="L234" href="#L234">234</a> } +<a class="jxr_linenumber" name="L235" href="#L235">235</a> } +<a class="jxr_linenumber" name="L236" href="#L236">236</a> <strong class="jxr_keyword">return</strong> transpositions / 2; +<a class="jxr_linenumber" name="L237" href="#L237">237</a> } +<a class="jxr_linenumber" name="L238" href="#L238">238</a> +<a class="jxr_linenumber" name="L239" href="#L239">239</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L240" href="#L240">240</a> <em class="jxr_javadoccomment"> * Compares all CharSequences in an array and returns the index at which the</em> +<a class="jxr_linenumber" name="L241" href="#L241">241</a> <em class="jxr_javadoccomment"> * CharSequences begin to differ.</em> +<a class="jxr_linenumber" name="L242" href="#L242">242</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L243" href="#L243">243</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="L244" href="#L244">244</a> <em class="jxr_javadoccomment"> * For example,</em> +<a class="jxr_linenumber" name="L245" href="#L245">245</a> <em class="jxr_javadoccomment"> * <code>indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -&gt; 7</code></em> +<a class="jxr_linenumber" name="L246" href="#L246">246</a> <em class="jxr_javadoccomment"> * </p></em> +<a class="jxr_linenumber" name="L247" href="#L247">247</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L248" href="#L248">248</a> <em class="jxr_javadoccomment"> * <pre></em> +<a class="jxr_linenumber" name="L249" href="#L249">249</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(null) = -1</em> +<a class="jxr_linenumber" name="L250" href="#L250">250</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {}) = -1</em> +<a class="jxr_linenumber" name="L251" href="#L251">251</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc"}) = -1</em> +<a class="jxr_linenumber" name="L252" href="#L252">252</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {null, null}) = -1</em> +<a class="jxr_linenumber" name="L253" href="#L253">253</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", ""}) = -1</em> +<a class="jxr_linenumber" name="L254" href="#L254">254</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", null}) = 0</em> +<a class="jxr_linenumber" name="L255" href="#L255">255</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", null, null}) = 0</em> +<a class="jxr_linenumber" name="L256" href="#L256">256</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {null, null, "abc"}) = 0</em> +<a class="jxr_linenumber" name="L257" href="#L257">257</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"", "abc"}) = 0</em> +<a class="jxr_linenumber" name="L258" href="#L258">258</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", ""}) = 0</em> +<a class="jxr_linenumber" name="L259" href="#L259">259</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", "abc"}) = -1</em> +<a class="jxr_linenumber" name="L260" href="#L260">260</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abc", "a"}) = 1</em> +<a class="jxr_linenumber" name="L261" href="#L261">261</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"ab", "abxyz"}) = 2</em> +<a class="jxr_linenumber" name="L262" href="#L262">262</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2</em> +<a class="jxr_linenumber" name="L263" href="#L263">263</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"abcde", "xyz"}) = 0</em> +<a class="jxr_linenumber" name="L264" href="#L264">264</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"xyz", "abcde"}) = 0</em> +<a class="jxr_linenumber" name="L265" href="#L265">265</a> <em class="jxr_javadoccomment"> * distance.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7</em> +<a class="jxr_linenumber" name="L266" href="#L266">266</a> <em class="jxr_javadoccomment"> * </pre></em> +<a class="jxr_linenumber" name="L267" href="#L267">267</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L268" href="#L268">268</a> <em class="jxr_javadoccomment"> * @param css array of CharSequences, entries may be null</em> +<a class="jxr_linenumber" name="L269" href="#L269">269</a> <em class="jxr_javadoccomment"> * @return the index where the strings begin to differ; -1 if they are all</em> +<a class="jxr_linenumber" name="L270" href="#L270">270</a> <em class="jxr_javadoccomment"> * equal</em> +<a class="jxr_linenumber" name="L271" href="#L271">271</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L272" href="#L272">272</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> indexOfDifference(<strong class="jxr_keyword">final</strong> CharSequence... css) { +<a class="jxr_linenumber" name="L273" href="#L273">273</a> <strong class="jxr_keyword">if</strong> (css == <strong class="jxr_keyword">null</strong> || css.length <= 1) { +<a class="jxr_linenumber" name="L274" href="#L274">274</a> <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND; +<a class="jxr_linenumber" name="L275" href="#L275">275</a> } +<a class="jxr_linenumber" name="L276" href="#L276">276</a> <strong class="jxr_keyword">boolean</strong> anyStringNull = false; +<a class="jxr_linenumber" name="L277" href="#L277">277</a> <strong class="jxr_keyword">boolean</strong> allStringsNull = <strong class="jxr_keyword">true</strong>; +<a class="jxr_linenumber" name="L278" href="#L278">278</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> arrayLen = css.length; +<a class="jxr_linenumber" name="L279" href="#L279">279</a> <strong class="jxr_keyword">int</strong> shortestStrLen = Integer.MAX_VALUE; +<a class="jxr_linenumber" name="L280" href="#L280">280</a> <strong class="jxr_keyword">int</strong> longestStrLen = 0; +<a class="jxr_linenumber" name="L281" href="#L281">281</a> +<a class="jxr_linenumber" name="L282" href="#L282">282</a> <em class="jxr_comment">// find the min and max string lengths; this avoids checking to make</em> +<a class="jxr_linenumber" name="L283" href="#L283">283</a> <em class="jxr_comment">// sure we are not exceeding the length of the string each time through</em> +<a class="jxr_linenumber" name="L284" href="#L284">284</a> <em class="jxr_comment">// the bottom loop.</em> +<a class="jxr_linenumber" name="L285" href="#L285">285</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < arrayLen; i++) { +<a class="jxr_linenumber" name="L286" href="#L286">286</a> <strong class="jxr_keyword">if</strong> (css[i] == <strong class="jxr_keyword">null</strong>) { +<a class="jxr_linenumber" name="L287" href="#L287">287</a> anyStringNull = <strong class="jxr_keyword">true</strong>; +<a class="jxr_linenumber" name="L288" href="#L288">288</a> shortestStrLen = 0; +<a class="jxr_linenumber" name="L289" href="#L289">289</a> } <strong class="jxr_keyword">else</strong> { +<a class="jxr_linenumber" name="L290" href="#L290">290</a> allStringsNull = false; +<a class="jxr_linenumber" name="L291" href="#L291">291</a> shortestStrLen = Math.min(css[i].length(), shortestStrLen); +<a class="jxr_linenumber" name="L292" href="#L292">292</a> longestStrLen = Math.max(css[i].length(), longestStrLen); +<a class="jxr_linenumber" name="L293" href="#L293">293</a> } +<a class="jxr_linenumber" name="L294" href="#L294">294</a> } +<a class="jxr_linenumber" name="L295" href="#L295">295</a> +<a class="jxr_linenumber" name="L296" href="#L296">296</a> <em class="jxr_comment">// handle lists containing all nulls or all empty strings</em> +<a class="jxr_linenumber" name="L297" href="#L297">297</a> <strong class="jxr_keyword">if</strong> (allStringsNull || longestStrLen == 0 && !anyStringNull) { +<a class="jxr_linenumber" name="L298" href="#L298">298</a> <strong class="jxr_keyword">return</strong> INDEX_NOT_FOUND; +<a class="jxr_linenumber" name="L299" href="#L299">299</a> } +<a class="jxr_linenumber" name="L300" href="#L300">300</a> +<a class="jxr_linenumber" name="L301" href="#L301">301</a> <em class="jxr_comment">// handle lists containing some nulls or some empty strings</em> +<a class="jxr_linenumber" name="L302" href="#L302">302</a> <strong class="jxr_keyword">if</strong> (shortestStrLen == 0) { +<a class="jxr_linenumber" name="L303" href="#L303">303</a> <strong class="jxr_keyword">return</strong> 0; +<a class="jxr_linenumber" name="L304" href="#L304">304</a> } +<a class="jxr_linenumber" name="L305" href="#L305">305</a> +<a class="jxr_linenumber" name="L306" href="#L306">306</a> <em class="jxr_comment">// find the position with the first difference across all strings</em> +<a class="jxr_linenumber" name="L307" href="#L307">307</a> <strong class="jxr_keyword">int</strong> firstDiff = -1; +<a class="jxr_linenumber" name="L308" href="#L308">308</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> stringPos = 0; stringPos < shortestStrLen; stringPos++) { +<a class="jxr_linenumber" name="L309" href="#L309">309</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> comparisonChar = css[0].charAt(stringPos); +<a class="jxr_linenumber" name="L310" href="#L310">310</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> arrayPos = 1; arrayPos < arrayLen; arrayPos++) { +<a class="jxr_linenumber" name="L311" href="#L311">311</a> <strong class="jxr_keyword">if</strong> (css[arrayPos].charAt(stringPos) != comparisonChar) { +<a class="jxr_linenumber" name="L312" href="#L312">312</a> firstDiff = stringPos; +<a class="jxr_linenumber" name="L313" href="#L313">313</a> <strong class="jxr_keyword">break</strong>; +<a class="jxr_linenumber" name="L314" href="#L314">314</a> } +<a class="jxr_linenumber" name="L315" href="#L315">315</a> } +<a class="jxr_linenumber" name="L316" href="#L316">316</a> <strong class="jxr_keyword">if</strong> (firstDiff != -1) { +<a class="jxr_linenumber" name="L317" href="#L317">317</a> <strong class="jxr_keyword">break</strong>; +<a class="jxr_linenumber" name="L318" href="#L318">318</a> } +<a class="jxr_linenumber" name="L319" href="#L319">319</a> } +<a class="jxr_linenumber" name="L320" href="#L320">320</a> +<a class="jxr_linenumber" name="L321" href="#L321">321</a> <strong class="jxr_keyword">if</strong> (firstDiff == -1 && shortestStrLen != longestStrLen) { +<a class="jxr_linenumber" name="L322" href="#L322">322</a> <em class="jxr_comment">// we compared all of the characters up to the length of the</em> +<a class="jxr_linenumber" name="L323" href="#L323">323</a> <em class="jxr_comment">// shortest string and didn't find a match, but the string lengths</em> +<a class="jxr_linenumber" name="L324" href="#L324">324</a> <em class="jxr_comment">// vary, so return the length of the shortest string.</em> +<a class="jxr_linenumber" name="L325" href="#L325">325</a> <strong class="jxr_keyword">return</strong> shortestStrLen; +<a class="jxr_linenumber" name="L326" href="#L326">326</a> } +<a class="jxr_linenumber" name="L327" href="#L327">327</a> <strong class="jxr_keyword">return</strong> firstDiff; +<a class="jxr_linenumber" name="L328" href="#L328">328</a> } +<a class="jxr_linenumber" name="L329" href="#L329">329</a> +<a class="jxr_linenumber" name="L330" href="#L330">330</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L331" href="#L331">331</a> <em class="jxr_javadoccomment"> * Gets a set of matching characters between two strings.</em> +<a class="jxr_linenumber" name="L332" href="#L332">332</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L333" href="#L333">333</a> <em class="jxr_javadoccomment"> * <p></em> +<a class="jxr_linenumber" name="L334" href="#L334">334</a> <em class="jxr_javadoccomment"> * Two characters from the first string and the second string are</em> +<a class="jxr_linenumber" name="L335" href="#L335">335</a> <em class="jxr_javadoccomment"> * considered matching if the character's respective positions are no</em> +<a class="jxr_linenumber" name="L336" href="#L336">336</a> <em class="jxr_javadoccomment"> * farther than the limit value.</em> +<a class="jxr_linenumber" name="L337" href="#L337">337</a> <em class="jxr_javadoccomment"> * </p></em> +<a class="jxr_linenumber" name="L338" href="#L338">338</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L339" href="#L339">339</a> <em class="jxr_javadoccomment"> * @param first The first string.</em> +<a class="jxr_linenumber" name="L340" href="#L340">340</a> <em class="jxr_javadoccomment"> * @param second The second string.</em> +<a class="jxr_linenumber" name="L341" href="#L341">341</a> <em class="jxr_javadoccomment"> * @param limit The maximum distance to consider.</em> +<a class="jxr_linenumber" name="L342" href="#L342">342</a> <em class="jxr_javadoccomment"> * @return A string contain the set of common characters.</em> +<a class="jxr_linenumber" name="L343" href="#L343">343</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L344" href="#L344">344</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> String getSetOfMatchingCharacterWithin( +<a class="jxr_linenumber" name="L345" href="#L345">345</a> <strong class="jxr_keyword">final</strong> CharSequence first, <strong class="jxr_keyword">final</strong> CharSequence second, <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> limit) { +<a class="jxr_linenumber" name="L346" href="#L346">346</a> <strong class="jxr_keyword">final</strong> StringBuilder common = <strong class="jxr_keyword">new</strong> StringBuilder(); +<a class="jxr_linenumber" name="L347" href="#L347">347</a> <strong class="jxr_keyword">final</strong> StringBuilder copy = <strong class="jxr_keyword">new</strong> StringBuilder(second); +<a class="jxr_linenumber" name="L348" href="#L348">348</a> +<a class="jxr_linenumber" name="L349" href="#L349">349</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < first.length(); i++) { +<a class="jxr_linenumber" name="L350" href="#L350">350</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">char</strong> ch = first.charAt(i); +<a class="jxr_linenumber" name="L351" href="#L351">351</a> <strong class="jxr_keyword">boolean</strong> found = false; +<a class="jxr_linenumber" name="L352" href="#L352">352</a> +<a class="jxr_linenumber" name="L353" href="#L353">353</a> <em class="jxr_comment">// See if the character is within the limit positions away from the</em> +<a class="jxr_linenumber" name="L354" href="#L354">354</a> <em class="jxr_comment">// original position of that character.</em> +<a class="jxr_linenumber" name="L355" href="#L355">355</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = Math.max(0, i - limit); !found +<a class="jxr_linenumber" name="L356" href="#L356">356</a> && j < Math.min(i + limit, second.length()); j++) { +<a class="jxr_linenumber" name="L357" href="#L357">357</a> <strong class="jxr_keyword">if</strong> (copy.charAt(j) == ch) { +<a class="jxr_linenumber" name="L358" href="#L358">358</a> found = <strong class="jxr_keyword">true</strong>; +<a class="jxr_linenumber" name="L359" href="#L359">359</a> common.append(ch); +<a class="jxr_linenumber" name="L360" href="#L360">360</a> copy.setCharAt(j, '*'); +<a class="jxr_linenumber" name="L361" href="#L361">361</a> } +<a class="jxr_linenumber" name="L362" href="#L362">362</a> } +<a class="jxr_linenumber" name="L363" href="#L363">363</a> } +<a class="jxr_linenumber" name="L364" href="#L364">364</a> <strong class="jxr_keyword">return</strong> common.toString(); +<a class="jxr_linenumber" name="L365" href="#L365">365</a> } +<a class="jxr_linenumber" name="L366" href="#L366">366</a> +<a class="jxr_linenumber" name="L367" href="#L367">367</a> } </pre> <hr/> <div id="footer">Copyright © 2014–2015 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div> </body> -</html> +</html> \ No newline at end of file