Repository: lucenenet
Updated Branches:
  refs/heads/api-work 54bad2c2d -> 548e768cc


Lucene.Net.Tests.Analysis.Common.Analysis.CharFilter.HTMLStripCharFilterTest: 
Added note about version compatibility level 4.8.1 and reformatted tests for 
easier reading


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/31ceeb20
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/31ceeb20
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/31ceeb20

Branch: refs/heads/api-work
Commit: 31ceeb20653ba84a26606f28c7f51e7baebe361b
Parents: 54bad2c
Author: Shad Storhaug <[email protected]>
Authored: Sun Mar 26 04:19:18 2017 +0700
Committer: Shad Storhaug <[email protected]>
Committed: Sun Mar 26 04:19:18 2017 +0700

----------------------------------------------------------------------
 .../Analysis/CharFilter/HTMLStripCharFilter.cs  |   1 +
 .../CharFilters/HTMLStripCharFilterTest.cs      | 228 ++++++++++++++++++-
 2 files changed, 217 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31ceeb20/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index 7dba4f6..7184212 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -29,6 +29,7 @@ namespace Lucene.Net.Analysis.CharFilters
     /// <summary>
     /// A <see cref="CharFilter"/> that wraps another <see cref="TextReader"/> 
and attempts to strip out HTML constructs.
     /// </summary>
+    // LUCENENET NOTE: Version compatibility level 4.8.1 (added fix for 
SOLR-5983: HTMLStripCharFilter is treating CDATA sections incorrectly)
     public sealed class HTMLStripCharFilter : BaseCharFilter
     {
         /// <summary>This character denotes the end of file</summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31ceeb20/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
----------------------------------------------------------------------
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
index 2b07c0d..0db491d 100644
--- 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
+++ 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
@@ -25,6 +25,7 @@ namespace Lucene.Net.Analysis.CharFilters
         * limitations under the License.
         */
 
+    // LUCENENET NOTE: Version compatibility level 4.8.1 (added fix for 
SOLR-5983: HTMLStripCharFilter is treating CDATA sections incorrectly)
     public class HTMLStripCharFilterTest : BaseTokenStreamTestCase
     {
 
@@ -56,8 +57,12 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void Test()
         {
-            string html = "<div class=\"foo\">this is some text</div> here is 
a <a href=\"#bar\">link</a> and " + "another <a 
href=\"http://lucene.apache.org/\";>link</a>. " + "This is an entity: &amp; plus 
a &lt;.  Here is an &. <!-- is a comment -->";
-            string gold = "\nthis is some text\n here is a link and " + 
"another link. " + "This is an entity: & plus a <.  Here is an &. ";
+            string html = "<div class=\"foo\">this is some text</div> here is 
a <a href=\"#bar\">link</a> and " + 
+                "another <a href=\"http://lucene.apache.org/\";>link</a>. " + 
+                "This is an entity: &amp; plus a &lt;.  Here is an &. <!-- is 
a comment -->";
+            string gold = "\nthis is some text\n here is a link and " + 
+                "another link. " + 
+                "This is an entity: & plus a <.  Here is an &. ";
             AssertHTMLStripsTo(html, gold, null);
         }
 
@@ -95,7 +100,8 @@ namespace Lucene.Net.Analysis.CharFilters
                 builder.Append((char)ch);
             }
             // Compare trim()'d output to gold
-            assertEquals("'" + builder.ToString().Trim() + "' is not equal to 
'" + gold + "'", gold, builder.ToString().Trim());
+            assertEquals("'" + builder.ToString().Trim() + "' is not equal to 
'" + gold + "'", 
+                gold, builder.ToString().Trim());
         }
 
         [Test]
@@ -144,7 +150,169 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestMalformedHTML()
         {
-            string[] testGold = new string[] { "a <a hr<ef=aa<a>> 
</close</a>", "a <a hr<ef=aa> </close", "<a 
href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\"; class=lu style=\"font-size: 
9px\" target=dmoz>Submit a Site</a>", "Submit a Site", "<a 
href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 
class=expanded rel=http://www.csmonitor.com/>Christian Science", "Christian 
Science", "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San 
Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\"; />", 
"\n", "<a href=\" 
http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery 
symptoms.html, heat congestive heart failure <a href=\" 
http://www.symptoms1bad.happyhost.org/canine";, "<a href=\" 
http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery 
symptoms.html, heat congestive heart failure <a href=\" 
http://www.symptoms1bad.happyhost.org/canine";, "<a 
href=\"http://ucblibraries.colorado.edu/how/index.htm\"cl
 ass=\"pageNavAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" 
rel=\"search\"  type=\"application/opensearchdescription+xml\"  
href=\"http://21sta.com/blog/inc/opensearch.php\"; />", "\n", "<a 
href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a 
href='/modern-furniture'   ' id='21txt' class='offtab'   
onMouseout=\"this.className='offtab';  return true;\" 
onMouseover=\"this.className='ontab';  return true;\">", "", "<a 
href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 
275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "", "The <a 
href=<a 
href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a>
 practice software</a>", "The <a 
href=medical\">http://www.advancedmd.com>medical practice software", "<a 
href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008 Clip of the 
Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week 
29...", "Levi.com/BMX 2008 Clip of the Week 29...", 
 "<a href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer 
Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" 
onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 
'Amazing Textures');return false\" onmouseover=\"window.status='Add to 
Favorites';return true\">Add to Favorites", "Add to Favorites", "<a 
href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At", 
"At", "E-mail: <a href=\"\"mailto:[email protected]\"; \">[email protected] 
</a>", "E-mail: [email protected] ", "<li class=\"farsi\"><a title=\"A'13?\" 
alt=\"A'13?\" href=\"http://www.america.gov/persian\"; alt=\"\" 
name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>", "\nA'13?\n", "<li><a 
href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\" Ausby</a></li>", 
"\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\";>", 
"\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" 
href=\"http://anissanina31.skyrock.c
 om/1895039493-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", 
"#", "<a href=\"http://annunciharleydavidsonusate.myblog.it/\"; title=\"Annunci 
Moto e Accessori Harley Davidson\" target=\"_blank\"><img 
src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\"; 
/></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return 
confirm('Are you sure you want  add this video to your profile? If it exists 
some video in your profile will be overlapped by this video!!')\" \" 
onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">", 
"", "<a href=#Services & Support>", "", "<input type=\"image\" 
src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\";
 name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + 
document.getElementById('sel').value ; var dt ='&dt=' + 
document.getElementById('dt').value; var searchKeyword = 
document.getElementById('q').value ; searchKeyword = searchKeyword.r
 eplace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing to search. 
Search keyword should contain atleast 3 chars.'); return false; } var al='&al=' 
+  document.getElementById('advancedlink').style.display ;  
document.location.href='http://apologyindex.com/search.aspx?q=' + 
document.getElementById('q').value + incom + dt + al;\" />", "", "<input 
type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\"  hspace=\"4\" 
title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){ 
window.sidebar.addPanel(document.title,location.href,''); }else 
if(window.external){ window.external.AddFavorite(location.href,document.title); 
}else if(window.opera&&window.print) { return true; }\">", "", "<area 
shape=\"rect\" coords=\"12,153,115,305\" 
href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's 
Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" 
href=\"http://arth26.skyrock.com/660188240-bonzai.html\"; title=\"bonza>#", "#", 
"<a href=  >", 
 "", "<ahref=http:..", "<ahref=http:..", "<ahref=http:..>", "\n", 
"<ahref=\"http://aseigo.bddf.ca/cms/1025\";>A", "\nA", "<a 
href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">",
 "", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" 
Rackmounts\">", "", "<a 
href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html
 title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" 
name=\"Lamborghini  href=\"http://lamborghini.coolbegin.com\";>Lamborghini /a>", 
"Lamborghini /a>", "<A 
href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida
 QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on 
eye black for championship game' TARGET=_blank>", "", "<a href=/myspace 
!style='color:#993333'>", "", "<meta name=3DProgId content=3DExcel.Sheet>"
 , "\n", "<link id=3D\"shLink\" 
href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">",
 "\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a 
href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\";>\"predicciones
 mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a 
class=\"comment-link\" 
href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>",
 "", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" 
videos\">Bishop\"</a>", "Bishop\"", "<a 
href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\";>BHAA Eircom 
2 &amp; 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined 
start", "<a href=\"http://people.tribe.net/wolfmana\"; 
onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\"
 title=\"Mana\">", "", "<a  href=\"http://blog.edu-cyberpg.com/
 
ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\"
 eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">", 
"<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search 
here\">", "<input type=\"text\" value=\"\n", "<input type=\"text\" 
value=\"search here>\">", "\">", "<input type=\"text\" value=\"&lt;search 
here&gt;\" onFocus=\"this.value='<search here>'\">", "", "<![if ! IE]>\n<link 
href=\"http://i.deviantart.com/icons/favicon.png\"; rel=\"shortcut 
icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr 
height=0 style='display:none'>\n<td width=64 
style='width:48pt'></td>\n</tr>\n<![endif]>", "\n\n\n\n\n\n\n\n" };
+            string[] testGold = {
+                "a <a hr<ef=aa<a>> </close</a>",
+                "a <a hr<ef=aa> </close",
+
+                "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\"; 
class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>",
+                "Submit a Site",
+
+                "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') 
title=expand id=e8 class=expanded rel=http://www.csmonitor.com/>Christian 
Science",
+                "Christian Science",
+
+                "<link rel=\"alternate\" type=\"application/rss+xml\" 
title=\"San Francisco \" 2008 RSS Feed\" 
href=\"http://2008.sf.wordcamp.org/feed/\"; />",
+                "\n",
+
+                "<a href=\" 
http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery 
symptoms.html, heat congestive heart failure <a href=\" 
http://www.symptoms1bad.happyhost.org/canine";,
+                "<a href=\" 
http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery 
symptoms.html, heat congestive heart failure <a href=\" 
http://www.symptoms1bad.happyhost.org/canine";,
+
+                "<a 
href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageNavAreaText\";>",
+                "",
+
+                "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\"  
type=\"application/opensearchdescription+xml\"  
href=\"http://21sta.com/blog/inc/opensearch.php\"; />",
+                "\n",
+
+                "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?",
+                "?",
+
+                "<a href='/modern-furniture'   ' id='21txt' class='offtab'   
onMouseout=\"this.className='offtab';  return true;\" 
onMouseover=\"this.className='ontab';  return true;\">",
+                "",
+
+                "<a 
href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 
275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>",
+                "",
+
+                "The <a href=<a 
href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a>
 practice software</a>",
+                "The <a href=medical\">http://www.advancedmd.com>medical 
practice software",
+
+                "<a href=\"node/21426\" class=\"clipTitle2\" 
title=\"Levi.com/BMX 2008 Clip of the Week 29 \"Morgan Wade Leftover 
Clips\"\">Levi.com/BMX 2008 Clip of the Week 29...",
+                "Levi.com/BMX 2008 Clip of the Week 29...",
+
+                "<a 
href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer 
Friendly",
+                "Printer Friendly",
+
+                "<a href=#\" ondragstart=\"return false\" 
onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 
'Amazing Textures');return false\" onmouseover=\"window.status='Add to 
Favorites';return true\">Add to Favorites",
+                "Add to Favorites",
+
+                "<a 
href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At",
+                "At",
+
+                "E-mail: <a href=\"\"mailto:[email protected]\"; 
\">[email protected] </a>",
+                "E-mail: [email protected] ",
+
+                "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" 
href=\"http://www.america.gov/persian\"; alt=\"\" name=\"A'13?\"A'13? 
title=\"A'13?\">A'13?</a></li>",
+                "\nA'13?\n",
+
+                "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert 
\"Geese\" Ausby</a></li>",
+                "\nHubert \"Geese\" Ausby\n",
+
+                "<href=\"http://anbportal.com/mms/login.asp\";>",
+                "\n",
+
+                "<a href=\"",
+                "<a href=\"",
+
+                "<a href=\">",
+                "",
+
+                "<a rel=\"nofollow\" 
href=\"http://anissanina31.skyrock.com/1895039493-Hi-tout-le-monde.html\"; 
title=\" Hi, tout le monde !>#</a>",
+                "#",
+
+                "<a href=\"http://annunciharleydavidsonusate.myblog.it/\"; 
title=\"Annunci Moto e Accessori Harley Davidson\" target=\"_blank\"><img 
src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\"; 
/></a>",
+                "",
+
+                "<a href=\"video/addvideo&v=120838887181\" onClick=\"return 
confirm('Are you sure you want  add this video to your profile? If it exists 
some video in your profile will be overlapped by this video!!')\" \" 
onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">",
+                "",
+
+                "<a href=#Services & Support>",
+                "",
+
+                "<input type=\"image\" 
src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\";
 name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + 
document.getElementById('sel').value ; var dt ='&dt=' + 
document.getElementById('dt').value; var searchKeyword = 
document.getElementById('q').value ; searchKeyword = 
searchKeyword.replace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing 
to search. Search keyword should contain atleast 3 chars.'); return false; } 
var al='&al=' +  document.getElementById('advancedlink').style.display ;  
document.location.href='http://apologyindex.com/search.aspx?q=' + 
document.getElementById('q').value + incom + dt + al;\" />",
+                "",
+
+                "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" 
height=\"22\"  hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to 
Favorite\"onClick=\" if(window.sidebar){ 
window.sidebar.addPanel(document.title,location.href,''); }else 
if(window.external){ window.external.AddFavorite(location.href,document.title); 
}else if(window.opera&&window.print) { return true; }\">",
+                "",
+
+                "<area shape=\"rect\" coords=\"12,153,115,305\" 
href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's 
Norwegian Roots in Rogaland\">",
+                "\n",
+
+                "<a rel=\"nofollow\" 
href=\"http://arth26.skyrock.com/660188240-bonzai.html\"; title=\"bonza>#",
+                "#",
+
+                "<a href=  >",
+                "",
+
+                "<ahref=http:..",
+                "<ahref=http:..",
+
+                "<ahref=http:..>",
+                "\n",
+
+                "<ahref=\"http://aseigo.bddf.ca/cms/1025\";>A",
+                "\nA",
+
+                "<a 
href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">",
+                "",
+
+                "<a href=\"/applications/defenseaerospace/19+rackmounts\" 
title=\"19\" Rackmounts\">",
+                "",
+
+                "<a 
href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html
 title=savage model 110 manual rel=dofollow>",
+                "",
+
+                "<a class=\"at\" name=\"Lamborghini  
href=\"http://lamborghini.coolbegin.com\";>Lamborghini /a>",
+                "Lamborghini /a>",
+
+                "<A 
href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida
 QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on 
eye black for championship game' TARGET=_blank>",
+                "",
+
+                "<a href=/myspace !style='color:#993333'>",
+                "",
+
+                "<meta name=3DProgId content=3DExcel.Sheet>",
+                "\n",
+
+                "<link id=3D\"shLink\" 
href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">",
+                "\n",
+
+                "<td bgcolor=3D\"#FFFFFF\" nowrap>",
+                "\n",
+
+                "<a 
href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\";>\"predicciones
 mundiales 2009\"</a>",
+                "\"predicciones mundiales 2009\"",
+
+                "<a class=\"comment-link\" 
href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>",
+                "",
+
+                "<a href = \"/videos/Bishop\"/\" title = \"click to see more 
Bishop\" videos\">Bishop\"</a>",
+                "Bishop\"",
+
+                "<a 
href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\";>BHAA Eircom 
2 &amp; 5 miles CC combined start</a>",
+                "BHAA Eircom 2 & 5 miles CC combined start",
+
+                "<a href=\"http://people.tribe.net/wolfmana\"; 
onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\"
 title=\"Mana\">",
+                "",
+
+                "<a  
href=\"http://blog.edu-cyberpg.com/ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\";
 eudora=\"autourl\">",
+                "",
+
+                "<input type=\"text\" value=\"<search here>\">",
+                "<input type=\"text\" value=\"\n\">",
+
+                "<input type=\"text\" value=\"<search here\">",
+                "<input type=\"text\" value=\"\n",
+
+                "<input type=\"text\" value=\"search here>\">",
+                "\">",
+
+                "<input type=\"text\" value=\"&lt;search here&gt;\" 
onFocus=\"this.value='<search here>'\">",
+                "",
+
+                "<![if ! IE]>\n<link 
href=\"http://i.deviantart.com/icons/favicon.png\"; rel=\"shortcut 
icon\"/>\n<![endif]>",
+                "\n\n\n",
+
+                "<![if supportMisalignedColumns]>\n<tr height=0 
style='display:none'>\n<td width=64 
style='width:48pt'></td>\n</tr>\n<![endif]>",
+                "\n\n\n\n\n\n\n\n"
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -249,7 +417,8 @@ namespace Lucene.Net.Analysis.CharFilters
             while ((ch = reader.Read()) > 0)
             {
                 int correction = reader.CorrectOffset(off);
-                assertTrue("invalid offset correction: " + off + "->" + 
correction + " for doc of length: " + length, correction <= length);
+                assertTrue("invalid offset correction: " + off + "->" + 
correction + " for doc of length: " + length, 
+                    correction <= length);
                 off++;
             }
         }
@@ -284,7 +453,9 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestServerSideIncludes()
         {
-            string test = "one<img src=\"image.png\"\n" + " alt =  \"Alt: 
<!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n" + " 
title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
+            string test = "one<img src=\"image.png\"\n" + 
+                " alt =  \"Alt: <!--#echo 
var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n" + 
+                " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
             string gold = "onetwo";
             AssertHTMLStripsTo(test, gold, null);
 
@@ -317,7 +488,11 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestStyle()
         {
-            string test = "one<style type=\"text/css\">\n" + "<!--\n" + 
"@import url('http://www.lasletrasdecanciones.com/css.css');\n" + "-->\n" + 
"</style>two";
+            string test = "one<style type=\"text/css\">\n" + 
+                "<!--\n" + 
+                "@import 
url('http://www.lasletrasdecanciones.com/css.css');\n" + 
+                "-->\n" + 
+                "</style>two";
             string gold = "one\ntwo";
             AssertHTMLStripsTo(test, gold, null);
         }
@@ -334,7 +509,13 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestBR()
         {
-            string[] testGold = new string[] { "one<BR />two<br>three", 
"one\ntwo\nthree", "one<BR some stuff here too>two</BR>", "one\ntwo\n" };
+            string[] testGold = {
+                "one<BR />two<br>three",
+                "one\ntwo\nthree",
+
+                "one<BR some stuff here too>two</BR>",
+                "one\ntwo\n"
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -361,13 +542,37 @@ namespace Lucene.Net.Analysis.CharFilters
         public virtual void TestCDATA()
         {
             int maxNumElems = 100;
-            string randomHtmlishString1 = 
TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">", " 
").replaceFirst("^--", "__"); // Don't create a comment (disallow "<!--") and 
don't include a closing ">"
+            string randomHtmlishString1 // Don't create a comment (disallow 
"<!--") and don't include a closing ">"
+                = TestUtil.RandomHtmlishString(Random(), 
maxNumElems).Replace(">", " ").replaceFirst("^--", "__");
             string closedAngleBangNonCDATA = "<!" + randomHtmlishString1 + 
"-[CDATA[&]]>";
 
-            string randomHtmlishString2 = 
TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">", " 
").replaceFirst("^--", "__"); // Don't create a comment (disallow "<!--") and 
don't include a closing ">"
+            string randomHtmlishString2 // Don't create a comment (disallow 
"<!--") and don't include a closing ">"
+                = TestUtil.RandomHtmlishString(Random(), 
maxNumElems).Replace(">", " ").replaceFirst("^--", "__");
             string unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 + 
"-[CDATA[";
 
-            string[] testGold = new string[] { 
"one<![CDATA[<one><two>three<four></four></two></one>]]>two", 
"one<one><two>three<four></four></two></one>two", 
"one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five", 
"onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&] ] >", 
"", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>", "", 
"<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009", 
"<!\u2009[CDATA[&]\u2009]\u2009", closedAngleBangNonCDATA, "", "<![CDATA[", "", 
"<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>", 
"<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>", 
"<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", 
unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA };
+            string[] testGold = {
+                "one<![CDATA[<one><two>three<four></four></two></one>]]>two",
+                "one<one><two>three<four></four></two></one>two",
+
+                "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five",
+                "onetwo<![CDATA[three]]>fourfive",
+
+                "<! [CDATA[&]]>", "",
+                "<! [CDATA[&] ] >", "",
+                "<! [CDATA[&]]", "<! [CDATA[&]]", // unclosed angle bang - all 
input is output
+                "<!\u2009[CDATA[&]]>", "",
+                "<!\u2009[CDATA[&]\u2009]\u2009>", "",
+                "<!\u2009[CDATA[&]\u2009]\u2009", 
"<!\u2009[CDATA[&]\u2009]\u2009", // unclosed angle bang - all input is output
+                closedAngleBangNonCDATA, "",
+                "<![CDATA[", "",
+                "<![CDATA[<br>", "<br>",
+                "<![CDATA[<br>]]", "<br>]]",
+                "<![CDATA[<br>]]>", "<br>",
+                "<![CDATA[<br>] ] >", "<br>] ] >",
+                "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>",
+                "<!\u2009[CDATA[", "<!\u2009[CDATA[",
+                unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -525,5 +730,4 @@ namespace Lucene.Net.Analysis.CharFilters
             assertEquals("'" + builder.ToString() + "' is not equal to '" + 
gold + "'", gold, builder.ToString());
         }
     }
-
 }
\ No newline at end of file

Reply via email to