Modified: websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html ============================================================================== --- websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html (original) +++ websites/production/commons/content/proper/commons-text/xref/org/apache/commons/text/similarity/JaroWinklerDistance.html Wed Apr 12 00:35:37 2017 @@ -83,86 +83,85 @@ <a class="jxr_linenumber" name="L75" href="#L75">75</a> @Override <a class="jxr_linenumber" name="L76" href="#L76">76</a> <strong class="jxr_keyword">public</strong> Double apply(<strong class="jxr_keyword">final</strong> CharSequence left, <strong class="jxr_keyword">final</strong> CharSequence right) { <a class="jxr_linenumber" name="L77" href="#L77">77</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> defaultScalingFactor = 0.1; -<a class="jxr_linenumber" name="L78" href="#L78">78</a> <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">double</strong> percentageRoundValue = 100.0; -<a class="jxr_linenumber" name="L79" href="#L79">79</a> -<a class="jxr_linenumber" name="L80" href="#L80">80</a> <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) { -<a class="jxr_linenumber" name="L81" href="#L81">81</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>); -<a class="jxr_linenumber" name="L82" href="#L82">82</a> } -<a class="jxr_linenumber" name="L83" href="#L83">83</a> -<a class="jxr_linenumber" name="L84" href="#L84">84</a> <strong class="jxr_keyword">int</strong>[] mtp = matches(left, right); -<a class="jxr_linenumber" name="L85" href="#L85">85</a> <strong class="jxr_keyword">double</strong> m = mtp[0]; -<a class="jxr_linenumber" name="L86" href="#L86">86</a> <strong class="jxr_keyword">if</strong> (m == 0) { -<a class="jxr_linenumber" name="L87" href="#L87">87</a> <strong class="jxr_keyword">return</strong> 0D; -<a class="jxr_linenumber" name="L88" href="#L88">88</a> } -<a class="jxr_linenumber" name="L89" href="#L89">89</a> <strong class="jxr_keyword">double</strong> j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3; -<a class="jxr_linenumber" name="L90" href="#L90">90</a> <strong class="jxr_keyword">double</strong> jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j); -<a class="jxr_linenumber" name="L91" href="#L91">91</a> <strong class="jxr_keyword">return</strong> Math.round(jw * percentageRoundValue) / percentageRoundValue; -<a class="jxr_linenumber" name="L92" href="#L92">92</a> } -<a class="jxr_linenumber" name="L93" href="#L93">93</a> -<a class="jxr_linenumber" name="L94" href="#L94">94</a> <em class="jxr_javadoccomment">/**</em> -<a class="jxr_linenumber" name="L95" href="#L95">95</a> <em class="jxr_javadoccomment"> * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.</em> -<a class="jxr_linenumber" name="L96" href="#L96">96</a> <em class="jxr_javadoccomment"> *</em> -<a class="jxr_linenumber" name="L97" href="#L97">97</a> <em class="jxr_javadoccomment"> * @param first the first string to be matched</em> -<a class="jxr_linenumber" name="L98" href="#L98">98</a> <em class="jxr_javadoccomment"> * @param second the second string to be machted</em> -<a class="jxr_linenumber" name="L99" href="#L99">99</a> <em class="jxr_javadoccomment"> * @return mtp array containing: matches, transpositions, prefix, and max length</em> -<a class="jxr_linenumber" name="L100" href="#L100">100</a> <em class="jxr_javadoccomment"> */</em> -<a class="jxr_linenumber" name="L101" href="#L101">101</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong>[] matches(<strong class="jxr_keyword">final</strong> CharSequence first, <strong class="jxr_keyword">final</strong> CharSequence second) { -<a class="jxr_linenumber" name="L102" href="#L102">102</a> CharSequence max, min; -<a class="jxr_linenumber" name="L103" href="#L103">103</a> <strong class="jxr_keyword">if</strong> (first.length() > second.length()) { -<a class="jxr_linenumber" name="L104" href="#L104">104</a> max = first; -<a class="jxr_linenumber" name="L105" href="#L105">105</a> min = second; -<a class="jxr_linenumber" name="L106" href="#L106">106</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="L107" href="#L107">107</a> max = second; -<a class="jxr_linenumber" name="L108" href="#L108">108</a> min = first; -<a class="jxr_linenumber" name="L109" href="#L109">109</a> } -<a class="jxr_linenumber" name="L110" href="#L110">110</a> <strong class="jxr_keyword">int</strong> range = Math.max(max.length() / 2 - 1, 0); -<a class="jxr_linenumber" name="L111" href="#L111">111</a> <strong class="jxr_keyword">int</strong>[] matchIndexes = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[min.length()]; -<a class="jxr_linenumber" name="L112" href="#L112">112</a> Arrays.fill(matchIndexes, -1); -<a class="jxr_linenumber" name="L113" href="#L113">113</a> <strong class="jxr_keyword">boolean</strong>[] matchFlags = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">boolean</strong>[max.length()]; -<a class="jxr_linenumber" name="L114" href="#L114">114</a> <strong class="jxr_keyword">int</strong> matches = 0; -<a class="jxr_linenumber" name="L115" href="#L115">115</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < min.length(); mi++) { -<a class="jxr_linenumber" name="L116" href="#L116">116</a> <strong class="jxr_keyword">char</strong> c1 = min.charAt(mi); -<a class="jxr_linenumber" name="L117" href="#L117">117</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) { -<a class="jxr_linenumber" name="L118" href="#L118">118</a> <strong class="jxr_keyword">if</strong> (!matchFlags[xi] && c1 == max.charAt(xi)) { -<a class="jxr_linenumber" name="L119" href="#L119">119</a> matchIndexes[mi] = xi; -<a class="jxr_linenumber" name="L120" href="#L120">120</a> matchFlags[xi] = <strong class="jxr_keyword">true</strong>; -<a class="jxr_linenumber" name="L121" href="#L121">121</a> matches++; -<a class="jxr_linenumber" name="L122" href="#L122">122</a> <strong class="jxr_keyword">break</strong>; -<a class="jxr_linenumber" name="L123" href="#L123">123</a> } -<a class="jxr_linenumber" name="L124" href="#L124">124</a> } -<a class="jxr_linenumber" name="L125" href="#L125">125</a> } -<a class="jxr_linenumber" name="L126" href="#L126">126</a> <strong class="jxr_keyword">char</strong>[] ms1 = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">char</strong>[matches]; -<a class="jxr_linenumber" name="L127" href="#L127">127</a> <strong class="jxr_keyword">char</strong>[] ms2 = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">char</strong>[matches]; -<a class="jxr_linenumber" name="L128" href="#L128">128</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0, si = 0; i < min.length(); i++) { -<a class="jxr_linenumber" name="L129" href="#L129">129</a> <strong class="jxr_keyword">if</strong> (matchIndexes[i] != -1) { -<a class="jxr_linenumber" name="L130" href="#L130">130</a> ms1[si] = min.charAt(i); -<a class="jxr_linenumber" name="L131" href="#L131">131</a> si++; -<a class="jxr_linenumber" name="L132" href="#L132">132</a> } -<a class="jxr_linenumber" name="L133" href="#L133">133</a> } -<a class="jxr_linenumber" name="L134" href="#L134">134</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0, si = 0; i < max.length(); i++) { -<a class="jxr_linenumber" name="L135" href="#L135">135</a> <strong class="jxr_keyword">if</strong> (matchFlags[i]) { -<a class="jxr_linenumber" name="L136" href="#L136">136</a> ms2[si] = max.charAt(i); -<a class="jxr_linenumber" name="L137" href="#L137">137</a> si++; -<a class="jxr_linenumber" name="L138" href="#L138">138</a> } -<a class="jxr_linenumber" name="L139" href="#L139">139</a> } -<a class="jxr_linenumber" name="L140" href="#L140">140</a> <strong class="jxr_keyword">int</strong> transpositions = 0; -<a class="jxr_linenumber" name="L141" href="#L141">141</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < ms1.length; mi++) { -<a class="jxr_linenumber" name="L142" href="#L142">142</a> <strong class="jxr_keyword">if</strong> (ms1[mi] != ms2[mi]) { -<a class="jxr_linenumber" name="L143" href="#L143">143</a> transpositions++; -<a class="jxr_linenumber" name="L144" href="#L144">144</a> } -<a class="jxr_linenumber" name="L145" href="#L145">145</a> } -<a class="jxr_linenumber" name="L146" href="#L146">146</a> <strong class="jxr_keyword">int</strong> prefix = 0; -<a class="jxr_linenumber" name="L147" href="#L147">147</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < min.length(); mi++) { -<a class="jxr_linenumber" name="L148" href="#L148">148</a> <strong class="jxr_keyword">if</strong> (first.charAt(mi) == second.charAt(mi)) { -<a class="jxr_linenumber" name="L149" href="#L149">149</a> prefix++; -<a class="jxr_linenumber" name="L150" href="#L150">150</a> } <strong class="jxr_keyword">else</strong> { -<a class="jxr_linenumber" name="L151" href="#L151">151</a> <strong class="jxr_keyword">break</strong>; -<a class="jxr_linenumber" name="L152" href="#L152">152</a> } -<a class="jxr_linenumber" name="L153" href="#L153">153</a> } -<a class="jxr_linenumber" name="L154" href="#L154">154</a> <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[] { matches, transpositions / 2, prefix, max.length() }; -<a class="jxr_linenumber" name="L155" href="#L155">155</a> } -<a class="jxr_linenumber" name="L156" href="#L156">156</a> -<a class="jxr_linenumber" name="L157" href="#L157">157</a> } +<a class="jxr_linenumber" name="L78" href="#L78">78</a> +<a class="jxr_linenumber" name="L79" href="#L79">79</a> <strong class="jxr_keyword">if</strong> (left == <strong class="jxr_keyword">null</strong> || right == <strong class="jxr_keyword">null</strong>) { +<a class="jxr_linenumber" name="L80" href="#L80">80</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> IllegalArgumentException(<span class="jxr_string">"Strings must not be null"</span>); +<a class="jxr_linenumber" name="L81" href="#L81">81</a> } +<a class="jxr_linenumber" name="L82" href="#L82">82</a> +<a class="jxr_linenumber" name="L83" href="#L83">83</a> <strong class="jxr_keyword">int</strong>[] mtp = matches(left, right); +<a class="jxr_linenumber" name="L84" href="#L84">84</a> <strong class="jxr_keyword">double</strong> m = mtp[0]; +<a class="jxr_linenumber" name="L85" href="#L85">85</a> <strong class="jxr_keyword">if</strong> (m == 0) { +<a class="jxr_linenumber" name="L86" href="#L86">86</a> <strong class="jxr_keyword">return</strong> 0D; +<a class="jxr_linenumber" name="L87" href="#L87">87</a> } +<a class="jxr_linenumber" name="L88" href="#L88">88</a> <strong class="jxr_keyword">double</strong> j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3; +<a class="jxr_linenumber" name="L89" href="#L89">89</a> <strong class="jxr_keyword">double</strong> jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j); +<a class="jxr_linenumber" name="L90" href="#L90">90</a> <strong class="jxr_keyword">return</strong> jw; +<a class="jxr_linenumber" name="L91" href="#L91">91</a> } +<a class="jxr_linenumber" name="L92" href="#L92">92</a> +<a class="jxr_linenumber" name="L93" href="#L93">93</a> <em class="jxr_javadoccomment">/**</em> +<a class="jxr_linenumber" name="L94" href="#L94">94</a> <em class="jxr_javadoccomment"> * This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.</em> +<a class="jxr_linenumber" name="L95" href="#L95">95</a> <em class="jxr_javadoccomment"> *</em> +<a class="jxr_linenumber" name="L96" href="#L96">96</a> <em class="jxr_javadoccomment"> * @param first the first string to be matched</em> +<a class="jxr_linenumber" name="L97" href="#L97">97</a> <em class="jxr_javadoccomment"> * @param second the second string to be machted</em> +<a class="jxr_linenumber" name="L98" href="#L98">98</a> <em class="jxr_javadoccomment"> * @return mtp array containing: matches, transpositions, prefix, and max length</em> +<a class="jxr_linenumber" name="L99" href="#L99">99</a> <em class="jxr_javadoccomment"> */</em> +<a class="jxr_linenumber" name="L100" href="#L100">100</a> <strong class="jxr_keyword">protected</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong>[] matches(<strong class="jxr_keyword">final</strong> CharSequence first, <strong class="jxr_keyword">final</strong> CharSequence second) { +<a class="jxr_linenumber" name="L101" href="#L101">101</a> CharSequence max, min; +<a class="jxr_linenumber" name="L102" href="#L102">102</a> <strong class="jxr_keyword">if</strong> (first.length() > second.length()) { +<a class="jxr_linenumber" name="L103" href="#L103">103</a> max = first; +<a class="jxr_linenumber" name="L104" href="#L104">104</a> min = second; +<a class="jxr_linenumber" name="L105" href="#L105">105</a> } <strong class="jxr_keyword">else</strong> { +<a class="jxr_linenumber" name="L106" href="#L106">106</a> max = second; +<a class="jxr_linenumber" name="L107" href="#L107">107</a> min = first; +<a class="jxr_linenumber" name="L108" href="#L108">108</a> } +<a class="jxr_linenumber" name="L109" href="#L109">109</a> <strong class="jxr_keyword">int</strong> range = Math.max(max.length() / 2 - 1, 0); +<a class="jxr_linenumber" name="L110" href="#L110">110</a> <strong class="jxr_keyword">int</strong>[] matchIndexes = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[min.length()]; +<a class="jxr_linenumber" name="L111" href="#L111">111</a> Arrays.fill(matchIndexes, -1); +<a class="jxr_linenumber" name="L112" href="#L112">112</a> <strong class="jxr_keyword">boolean</strong>[] matchFlags = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">boolean</strong>[max.length()]; +<a class="jxr_linenumber" name="L113" href="#L113">113</a> <strong class="jxr_keyword">int</strong> matches = 0; +<a class="jxr_linenumber" name="L114" href="#L114">114</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < min.length(); mi++) { +<a class="jxr_linenumber" name="L115" href="#L115">115</a> <strong class="jxr_keyword">char</strong> c1 = min.charAt(mi); +<a class="jxr_linenumber" name="L116" href="#L116">116</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) { +<a class="jxr_linenumber" name="L117" href="#L117">117</a> <strong class="jxr_keyword">if</strong> (!matchFlags[xi] && c1 == max.charAt(xi)) { +<a class="jxr_linenumber" name="L118" href="#L118">118</a> matchIndexes[mi] = xi; +<a class="jxr_linenumber" name="L119" href="#L119">119</a> matchFlags[xi] = <strong class="jxr_keyword">true</strong>; +<a class="jxr_linenumber" name="L120" href="#L120">120</a> matches++; +<a class="jxr_linenumber" name="L121" href="#L121">121</a> <strong class="jxr_keyword">break</strong>; +<a class="jxr_linenumber" name="L122" href="#L122">122</a> } +<a class="jxr_linenumber" name="L123" href="#L123">123</a> } +<a class="jxr_linenumber" name="L124" href="#L124">124</a> } +<a class="jxr_linenumber" name="L125" href="#L125">125</a> <strong class="jxr_keyword">char</strong>[] ms1 = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">char</strong>[matches]; +<a class="jxr_linenumber" name="L126" href="#L126">126</a> <strong class="jxr_keyword">char</strong>[] ms2 = <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">char</strong>[matches]; +<a class="jxr_linenumber" name="L127" href="#L127">127</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0, si = 0; i < min.length(); i++) { +<a class="jxr_linenumber" name="L128" href="#L128">128</a> <strong class="jxr_keyword">if</strong> (matchIndexes[i] != -1) { +<a class="jxr_linenumber" name="L129" href="#L129">129</a> ms1[si] = min.charAt(i); +<a class="jxr_linenumber" name="L130" href="#L130">130</a> si++; +<a class="jxr_linenumber" name="L131" href="#L131">131</a> } +<a class="jxr_linenumber" name="L132" href="#L132">132</a> } +<a class="jxr_linenumber" name="L133" href="#L133">133</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0, si = 0; i < max.length(); i++) { +<a class="jxr_linenumber" name="L134" href="#L134">134</a> <strong class="jxr_keyword">if</strong> (matchFlags[i]) { +<a class="jxr_linenumber" name="L135" href="#L135">135</a> ms2[si] = max.charAt(i); +<a class="jxr_linenumber" name="L136" href="#L136">136</a> si++; +<a class="jxr_linenumber" name="L137" href="#L137">137</a> } +<a class="jxr_linenumber" name="L138" href="#L138">138</a> } +<a class="jxr_linenumber" name="L139" href="#L139">139</a> <strong class="jxr_keyword">int</strong> transpositions = 0; +<a class="jxr_linenumber" name="L140" href="#L140">140</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < ms1.length; mi++) { +<a class="jxr_linenumber" name="L141" href="#L141">141</a> <strong class="jxr_keyword">if</strong> (ms1[mi] != ms2[mi]) { +<a class="jxr_linenumber" name="L142" href="#L142">142</a> transpositions++; +<a class="jxr_linenumber" name="L143" href="#L143">143</a> } +<a class="jxr_linenumber" name="L144" href="#L144">144</a> } +<a class="jxr_linenumber" name="L145" href="#L145">145</a> <strong class="jxr_keyword">int</strong> prefix = 0; +<a class="jxr_linenumber" name="L146" href="#L146">146</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> mi = 0; mi < min.length(); mi++) { +<a class="jxr_linenumber" name="L147" href="#L147">147</a> <strong class="jxr_keyword">if</strong> (first.charAt(mi) == second.charAt(mi)) { +<a class="jxr_linenumber" name="L148" href="#L148">148</a> prefix++; +<a class="jxr_linenumber" name="L149" href="#L149">149</a> } <strong class="jxr_keyword">else</strong> { +<a class="jxr_linenumber" name="L150" href="#L150">150</a> <strong class="jxr_keyword">break</strong>; +<a class="jxr_linenumber" name="L151" href="#L151">151</a> } +<a class="jxr_linenumber" name="L152" href="#L152">152</a> } +<a class="jxr_linenumber" name="L153" href="#L153">153</a> <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">new</strong> <strong class="jxr_keyword">int</strong>[] { matches, transpositions / 2, prefix, max.length() }; +<a class="jxr_linenumber" name="L154" href="#L154">154</a> } +<a class="jxr_linenumber" name="L155" href="#L155">155</a> +<a class="jxr_linenumber" name="L156" href="#L156">156</a> } </pre> <hr/> <div id="footer">Copyright © 2014–2017 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</div>