http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/En/KStemData8.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/En/KStemData8.cs b/src/contrib/Analyzers/En/KStemData8.cs deleted file mode 100644 index 99a7a82..0000000 --- a/src/contrib/Analyzers/En/KStemData8.cs +++ /dev/null @@ -1,655 +0,0 @@ -/* -Copyright © 2003, -Center for Intelligent Information Retrieval, -University of Massachusetts, Amherst. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. The names "Center for Intelligent Information Retrieval" and -"University of Massachusetts" must not be used to endorse or promote products -derived from this software without prior written permission. To obtain -permission, contact [email protected]. - -THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. -*/ -/* This is a C# version of Bob Krovetz' KStem. - * - * Ported from the Java version by Sergio Guzman-Lara. - * CIIR-UMass Amherst http://ciir.cs.umass.edu - */ - -using System; - -namespace Lucene.Net.Analysis.En -{ - /** A list of words used by Kstem - */ - internal class KStemData8 - { - private KStemData8() - { - } - - internal static readonly String[] data = - { - "tenor", "tenpin", "tense", "tensile", "tension", - "tent", "tentacle", "tentative", "tenterhooks", "tenuity", - "tenuous", "tenure", "tepee", "tepid", "tequila", - "tercentenary", "tercentennial", "term", "termagant", "terminable", - "terminal", "terminate", "termination", "terminology", "terminus", - "termite", "terms", "tern", "terpsichorean", "terrace", - "terracotta", "terrain", "terrapin", "terrestrial", "terrible", - "terribly", "terrier", "terrific", "terrifically", "terrify", - "territorial", "territory", "terror", "terrorise", "terrorism", - "terrorize", "terrycloth", "terse", "tertian", "tertiary", - "terylene", "tessellated", "test", "testament", "testamentary", - "testate", "testator", "tester", "testicle", "testify", - "testimonial", "testimony", "testis", "testy", "tetanus", - "tetchy", "tether", "teutonic", "text", "textbook", - "textile", "textual", "texture", "thalidomide", "than", - "thane", "thank", "thankful", "thankless", "thanks", - "thanksgiving", "thankyou", "that", "thatch", "thaw", - "the", "theater", "theatergoer", "theatre", "theatregoer", - "theatrical", "theatricals", "thee", "theft", "thegn", - "their", "theirs", "theism", "them", "theme", - "themselves", "then", "thence", "thenceforth", "theocracy", - "theocratic", "theodolite", "theologian", "theology", "theorem", - "theoretical", "theoretically", "theorise", "theorist", "theorize", - "theory", "theosophy", "therapeutic", "therapeutics", "therapist", - "therapy", "there", "thereabouts", "thereafter", "thereby", - "therefore", "therein", "thereinafter", "thereof", "thereon", - "thereto", "thereunder", "thereupon", "therm", "thermal", - "thermionic", "thermionics", "thermodynamics", "thermometer", "thermonuclear", - "thermoplastic", "thermos", "thermosetting", "thermostat", "thesaurus", - "these", "thesis", "thespian", "thews", "they", - "thick", "thicken", "thickener", "thicket", "thickheaded", - "thickness", "thickset", "thief", "thieve", "thieving", - "thievish", "thigh", "thimble", "thimbleful", "thin", - "thine", "thing", "thingamajig", "thingamujig", "things", - "think", "thinkable", "thinking", "thinner", "third", - "thirst", "thirsty", "thirteen", "thirty", "this", - "thistle", "thistledown", "thither", "thole", "thong", - "thorax", "thorn", "thorny", "thorough", "thoroughbred", - "thoroughfare", "thoroughgoing", "those", "thou", "though", - "thought", "thoughtful", "thoughtless", "thousand", "thraldom", - "thrall", "thralldom", "thrash", "thrashing", "thread", - "threadbare", "threadlike", "threat", "threaten", "three", - "threepence", "threnody", "thresh", "thresher", "threshold", - "threw", "thrice", "thrift", "thrifty", "thrill", - "thriller", "thrive", "throat", "throaty", "throb", - "throes", "thrombosis", "throne", "throng", "throstle", - "throttle", "through", "throughout", "throughput", "throughway", - "throw", "throwaway", "throwback", "thru", "thrum", - "thrush", "thrust", "thruster", "thruway", "thud", - "thug", "thuggery", "thumb", "thumbnail", "thumbscrew", - "thumbtack", "thump", "thumping", "thunder", "thunderbolt", - "thunderclap", "thundercloud", "thundering", "thunderous", "thunderstorm", - "thunderstruck", "thundery", "thurible", "thursday", "thus", - "thwack", "thwart", "thy", "thyme", "thyroid", - "thyself", "tiara", "tibia", "tic", "tick", - "ticker", "tickertape", "ticket", "ticking", "tickle", - "tickler", "ticklish", "tidal", "tidbit", "tiddler", - "tiddley", "tiddleywinks", "tiddly", "tiddlywinks", "tide", - "tidemark", "tidewater", "tideway", "tidings", "tidy", - "tie", "tiebreaker", "tiepin", "tier", "tiff", - "tiffin", "tig", "tiger", "tigerish", "tight", - "tighten", "tightfisted", "tightrope", "tights", "tightwad", - "tigress", "tike", "tilde", "tile", "till", - "tillage", "tiller", "tilt", "timber", "timbered", - "timberline", "timbre", "timbrel", "time", "timekeeper", - "timeless", "timely", "timepiece", "timer", "times", - "timesaving", "timeserver", "timeserving", "timetable", "timework", - "timeworn", "timid", "timing", "timorous", "timothy", - "timpani", "timpanist", "tin", "tincture", "tinder", - "tinderbox", "tinfoil", "ting", "tingaling", "tinge", - "tingle", "tinker", "tinkle", "tinny", "tinplate", - "tinsel", "tint", "tintack", "tintinnabulation", "tiny", - "tip", "tippet", "tipple", "tipstaff", "tipster", - "tipsy", "tiptoe", "tirade", "tire", "tired", - "tireless", "tiresome", "tiro", "tissue", "tit", - "titan", "titanic", "titanium", "titbit", "titfer", - "tithe", "titillate", "titivate", "title", "titled", - "titleholder", "titmouse", "titter", "tittivate", "tittle", - "titty", "titular", "tizzy", "tnt", "toad", - "toadstool", "toady", "toast", "toaster", "toastmaster", - "tobacco", "tobacconist", "toboggan", "toccata", "tocsin", - "tod", "today", "toddle", "toddler", "toddy", - "toe", "toehold", "toenail", "toff", "toffee", - "toffy", "tog", "toga", "together", "togetherness", - "toggle", "togs", "toil", "toilet", "toiletries", - "toiletry", "toils", "tokay", "token", "told", - "tolerable", "tolerably", "tolerance", "tolerant", "tolerate", - "toleration", "toll", "tollgate", "tollhouse", "tomahawk", - "tomato", "tomb", "tombola", "tomboy", "tombstone", - "tomcat", "tome", "tomfoolery", "tommyrot", "tomorrow", - "tomtit", "ton", "tonal", "tonality", "tone", - "toneless", "tong", "tongs", "tongue", "tonic", - "tonight", "tonnage", "tonne", "tonsil", "tonsilitis", - "tonsillitis", "tonsorial", "tonsure", "tontine", "too", - "took", "tool", "toot", "tooth", "toothache", - "toothbrush", "toothcomb", "toothpaste", "toothpick", "toothsome", - "toothy", "tootle", "toots", "tootsie", "top", - "topaz", "topcoat", "topdressing", "topee", "topgallant", - "topi", "topiary", "topic", "topical", "topicality", - "topknot", "topless", "topmast", "topmost", "topographer", - "topographical", "topography", "topper", "topping", "topple", - "tops", "topsail", "topside", "topsoil", "topspin", - "toque", "tor", "torch", "torchlight", "tore", - "toreador", "torment", "tormentor", "torn", "tornado", - "torpedo", "torpid", "torpor", "torque", "torrent", - "torrential", "torrid", "torsion", "torso", "tort", - "tortilla", "tortoise", "tortoiseshell", "tortuous", "torture", - "tory", "toss", "tot", "total", "totalisator", - "totalitarian", "totalitarianism", "totality", "totalizator", "tote", - "totem", "totter", "tottery", "toucan", "touch", - "touchdown", "touched", "touching", "touchline", "touchstone", - "touchy", "tough", "toughen", "toupee", "tour", - "tourism", "tourist", "tournament", "tourney", "tourniquet", - "tousle", "tout", "tow", "towards", "towel", - "toweling", "towelling", "tower", "towering", "towline", - "town", "townscape", "township", "townsman", "townspeople", - "towpath", "toxaemia", "toxemia", "toxic", "toxicologist", - "toxicology", "toxin", "toy", "toyshop", "trace", - "tracer", "tracery", "trachea", "trachoma", "tracing", - "track", "trackless", "tracksuit", "tract", "tractable", - "traction", "tractor", "trad", "trade", "trademark", - "trader", "trades", "tradesman", "tradespeople", "tradition", - "traditional", "traditionalism", "traduce", "traffic", "trafficator", - "trafficker", "tragedian", "tragedienne", "tragedy", "tragic", - "tragicomedy", "trail", "trailer", "train", "trainbearer", - "trainee", "training", "trainman", "traipse", "trait", - "traitor", "traitorous", "trajectory", "tram", "tramline", - "trammel", "trammels", "tramp", "trample", "trampoline", - "trance", "tranny", "tranquil", "tranquiliser", "tranquillise", - "tranquillize", "tranquillizer", "transact", "transaction", "transactions", - "transalpine", "transatlantic", "transcend", "transcendence", "transcendent", - "transcendental", "transcendentalism", "transcontinental", "transcribe", "transcript", - "transcription", "transept", "transfer", "transference", "transfiguration", - "transfigure", "transfix", "transform", "transformation", "transformer", - "transfuse", "transgress", "tranship", "transience", "transient", - "transistor", "transistorise", "transistorize", "transit", "transition", - "transitive", "translate", "translator", "transliterate", "translucence", - "translucent", "transmigration", "transmission", "transmit", "transmitter", - "transmogrify", "transmute", "transoceanic", "transom", "transparency", - "transparent", "transpiration", "transpire", "transplant", "transpolar", - "transport", "transportation", "transporter", "transpose", "transship", - "transubstantiation", "transverse", "transvestism", "transvestite", "trap", - "trapdoor", "trapeze", "trapezium", "trapezoid", "trapper", - "trappings", "trappist", "trapse", "trapshooting", "trash", - "trashcan", "trashy", "trauma", "traumatic", "travail", - "travel", "traveled", "traveler", "travelled", "traveller", - "travelog", "travelogue", "travels", "travelsick", "traverse", - "travesty", "trawl", "trawler", "tray", "treacherous", - "treachery", "treacle", "treacly", "tread", "treadle", - "treadmill", "treason", "treasonable", "treasure", "treasurer", - "treasury", "treat", "treatise", "treatment", "treaty", - "treble", "tree", "trefoil", "trek", "trellis", - "tremble", "tremendous", "tremolo", "tremor", "tremulous", - "trench", "trenchant", "trencher", "trencherman", "trend", - "trendsetter", "trendy", "trepan", "trephine", "trepidation", - "trespass", "tresses", "trestle", "trews", "triad", - "trial", "triangle", "triangular", "tribal", "tribalism", - "tribe", "tribesman", "tribulation", "tribunal", "tribune", - "tributary", "tribute", "trice", "triceps", "trichinosis", - "trick", "trickery", "trickle", "trickster", "tricky", - "tricolor", "tricolour", "tricycle", "trident", "triennial", - "trier", "trifle", "trifler", "trifling", "trigger", - "trigonometry", "trike", "trilateral", "trilby", "trilingual", - "trill", "trillion", "trilobite", "trilogy", "trim", - "trimaran", "trimester", "trimmer", "trimming", "trinitrotoluene", - "trinity", "trinket", "trio", "trip", "tripartite", - "triple", "triplet", "triplex", "triplicate", "tripod", - "tripos", "tripper", "tripping", "triptych", "tripwire", - "trireme", "trisect", "trite", "triumph", "triumphal", - "triumphant", "triumvir", "triumvirate", "trivet", "trivia", - "trivial", "trivialise", "triviality", "trivialize", "trochaic", - "trochee", "trod", "trodden", "troglodyte", "troika", - "trojan", "troll", "trolley", "trolleybus", "trollop", - "trombone", "trombonist", "troop", "trooper", "troops", - "troopship", "trope", "trophy", "tropic", "tropical", - "tropics", "trot", "troth", "trotskyist", "trotter", - "troubadour", "trouble", "troublemaker", "troubleshooter", "troublesome", - "trough", "trounce", "troupe", "trouper", "trouser", - "trousers", "trousseau", "trout", "trove", "trowel", - "truancy", "truant", "truce", "truck", "trucking", - "truckle", "truculence", "truculent", "trudge", "true", - "trueborn", "truehearted", "truelove", "truffle", "trug", - "truism", "truly", "trump", "trumpery", "trumpet", - "trumps", "truncate", "truncheon", "trundle", "trunk", - "trunks", "truss", "trust", "trustee", "trusteeship", - "trustful", "trustworthy", "trusty", "truth", "truthful", - "try", "tryst", "tsar", "tsarina", "tsp", - "tub", "tuba", "tubby", "tube", "tubeless", - "tuber", "tubercular", "tuberculosis", "tubful", "tubing", - "tubular", "tuck", "tucker", "tuckerbag", "tuesday", - "tuft", "tug", "tugboat", "tuition", "tulip", - "tulle", "tumble", "tumbledown", "tumbler", "tumbleweed", - "tumbrel", "tumbril", "tumescent", "tumid", "tummy", - "tumor", "tumour", "tumult", "tumultuous", "tumulus", - "tun", "tuna", "tundra", "tune", "tuneful", - "tuneless", "tuner", "tungsten", "tunic", "tunnel", - "tunny", "tup", "tuppence", "tuppenny", "turban", - "turbid", "turbine", "turbojet", "turboprop", "turbot", - "turbulence", "turbulent", "turd", "tureen", "turf", - "turgid", "turkey", "turmeric", "turmoil", "turn", - "turnabout", "turncoat", "turncock", "turner", "turning", - "turnip", "turnkey", "turnout", "turnover", "turnpike", - "turnstile", "turntable", "turpentine", "turpitude", "turquoise", - "turret", "turtle", "turtledove", "turtleneck", "tush", - "tusk", "tusker", "tussle", "tussock", "tut", - "tutelage", "tutelary", "tutor", "tutorial", "tutu", - "tuxedo", "twaddle", "twain", "twang", "twat", - "tweak", "twee", "tweed", "tweeds", "tweedy", - "tweet", "tweeter", "tweezers", "twelfth", "twelve", - "twelvemonth", "twenty", "twerp", "twice", "twiddle", - "twig", "twilight", "twill", "twin", "twinge", - "twinkle", "twinkling", "twirl", "twirp", "twist", - "twister", "twit", "twitch", "twitter", "twixt", - "two", "twofaced", "twopence", "twopenny", "twosome", - "tycoon", "tyke", "tympanum", "type", "typecast", - "typeface", "typescript", "typesetter", "typewriter", "typewritten", - "typhoid", "typhoon", "typhus", "typical", "typically", - "typify", "typist", "typographer", "typographic", "typography", - "tyrannical", "tyrannise", "tyrannize", "tyrannosaurus", "tyranny", - "tyrant", "tyre", "tyro", "tzar", "tzarina", - "ubiquitous", "ucca", "udder", "ufo", "ugh", - "ugly", "uhf", "ukulele", "ulcer", "ulcerate", - "ulcerous", "ullage", "ulna", "ult", "ulterior", - "ultimate", "ultimately", "ultimatum", "ultimo", "ultramarine", - "ultrasonic", "ultraviolet", "umber", "umbrage", "umbrella", - "umlaut", "umpire", "umpteen", "unabashed", "unabated", - "unable", "unabridged", "unaccompanied", "unaccountable", "unaccustomed", - "unadopted", "unadulterated", "unadvised", "unaffected", "unalloyed", - "unanimous", "unannounced", "unanswerable", "unapproachable", "unarmed", - "unasked", "unassuming", "unattached", "unattended", "unavailing", - "unawares", "unbalance", "unbar", "unbearable", "unbearably", - "unbeknown", "unbelief", "unbelievable", "unbeliever", "unbelieving", - "unbend", "unbending", "unbidden", "unbind", "unblushing", - "unborn", "unbosom", "unbounded", "unbowed", "unbridled", - "unbuckle", "unburden", "unbuttoned", "uncanny", "unceremonious", - "uncertain", "uncertainty", "uncharitable", "uncharted", "unchecked", - "unchristian", "unclad", "uncle", "unclean", "unclouded", - "uncolored", "uncoloured", "uncomfortable", "uncommitted", "uncommonly", - "uncompromising", "unconcerned", "unconditional", "unconscionable", "unconscious", - "unconsidered", "uncork", "uncouple", "uncouth", "uncover", - "uncritical", "uncrowned", "uncrushable", "unction", "unctuous", - "uncut", "undaunted", "undeceive", "undecided", "undeclared", - "undeniable", "under", "underact", "underarm", "underbelly", - "underbrush", "undercarriage", "undercharge", "underclothes", "undercoat", - "undercover", "undercurrent", "undercut", "underdog", "underdone", - "underestimate", "underfelt", "underfloor", "underfoot", "undergarment", - "undergo", "undergraduate", "underground", "undergrowth", "underhand", - "underhanded", "underhung", "underlay", "underlie", "underline", - "underling", "underlying", "undermanned", "undermentioned", "undermine", - "underneath", "undernourish", "underpants", "underpass", "underpin", - "underplay", "underprivileged", "underproof", "underquote", "underrate", - "underscore", "undersecretary", "undersell", "undersexed", "undershirt", - "underside", "undersigned", "undersized", "underslung", "understaffed", - "understand", "understanding", "understate", "understatement", "understudy", - "undertake", "undertaker", "undertaking", "undertone", "undertow", - "underwater", "underwear", "underweight", "underwent", "underworld", - "underwrite", "underwriter", "undesirable", "undeveloped", "undies", - "undischarged", "undistinguished", "undivided", "undo", "undoing", - "undomesticated", "undone", "undoubted", "undress", "undressed", - "undue", "undulate", "undulation", "unduly", "undying", - "unearth", "unearthly", "unease", "uneasy", "uneconomic", - "uneducated", "unemployed", "unemployment", "unenlightened", "unenviable", - "unequal", "unequaled", "unequalled", "unequivocal", "unerring", - "unesco", "uneven", "uneventful", "unexampled", "unexceptionable", - "unfailing", "unfaithful", "unfaltering", "unfathomable", "unfathomed", - "unfavorable", "unfavourable", "unfeeling", "unfettered", "unfit", - "unflagging", "unflappable", "unflinching", "unfold", "unforeseen", - "unforgettable", "unfortunate", "unfortunately", "unfounded", "unfrequented", - "unfrock", "unfurl", "ungainly", "ungenerous", "ungodly", - "ungovernable", "ungracious", "ungrateful", "ungrudging", "unguarded", - "unguent", "unhallowed", "unhand", "unhappily", "unhappy", - "unhealthy", "unheard", "unhinge", "unholy", "unhook", - "unhorse", "unicef", "unicorn", "unidentified", "unification", - "uniform", "uniformed", "unify", "unilateral", "unimpeachable", - "uninformed", "uninhabitable", "uninhibited", "uninterested", "uninterrupted", - "union", "unionise", "unionism", "unionist", "unionize", - "unique", "unisex", "unison", "unit", "unitarian", - "unite", "united", "unity", "universal", "universally", - "universe", "university", "unkempt", "unkind", "unkindly", - "unknowing", "unknown", "unlawful", "unlearn", "unleash", - "unleavened", "unless", "unlettered", "unlike", "unlikely", - "unload", "unlock", "unloose", "unloosen", "unmade", - "unmannerly", "unmarried", "unmask", "unmatched", "unmeasured", - "unmentionable", "unmentionables", "unmindful", "unmistakable", "unmitigated", - "unmoved", "unnatural", "unnecessary", "unnerve", "unnumbered", - "uno", "unobtrusive", "unofficial", "unorthodox", "unpack", - "unparalleled", "unparliamentary", "unperson", "unpick", "unplaced", - "unplayable", "unpleasant", "unplumbed", "unpracticed", "unpractised", - "unprecedented", "unprejudiced", "unpretentious", "unprincipled", "unprintable", - "unprofessional", "unprompted", "unprovoked", "unqualified", "unquestionable", - "unquestioning", "unquiet", "unquote", "unravel", "unreadable", - "unreal", "unreasonable", "unreasoning", "unrelenting", "unrelieved", - "unremitting", "unrequited", "unreserved", "unrest", "unrestrained", - "unrip", "unrivaled", "unrivalled", "unroll", "unruffled", - "unruly", "unsaddle", "unsaid", "unsavory", "unsavoury", - "unsay", "unscathed", "unschooled", "unscramble", "unscrew", - "unscripted", "unscrupulous", "unseat", "unseeing", "unseemly", - "unseen", "unserviceable", "unsettle", "unsettled", "unsex", - "unsexed", "unshakable", "unshakeable", "unshod", "unsightly", - "unskilled", "unsociable", "unsocial", "unsophisticated", "unsound", - "unsparing", "unspeakable", "unspotted", "unstop", "unstrung", - "unstuck", "unstudied", "unsullied", "unsung", "unswerving", - "untangle", "untapped", "untenable", "unthinkable", "unthinking", - "untie", "until", "untimely", "untinged", "untiring", - "unto", "untold", "untouchable", "untoward", "untruth", - "untruthful", "untutored", "unused", "unusual", "unusually", - "unutterable", "unvarnished", "unveil", "unversed", "unvoiced", - "unwarranted", "unwed", "unwell", "unwieldy", "unwind", - "unwitting", "unwonted", "unzip", "upbeat", "upbraid", - "upbringing", "upcoming", "update", "upend", "upgrade", - "upheaval", "uphill", "uphold", "upholster", "upholsterer", - "upholstery", "upkeep", "upland", "uplift", "upon", - "upper", "uppercut", "uppermost", "uppish", "uppity", - "upright", "uprising", "uproar", "uproarious", "uproot", - "upset", "upshot", "upstage", "upstairs", "upstanding", - "upstart", "upstream", "upsurge", "upswing", "uptake", - "uptight", "uptown", "upturn", "upturned", "upward", - "upwards", "uranium", "uranus", "urban", "urbane", - "urbanise", "urbanize", "urchin", "urge", "urgent", - "uric", "urinal", "urinary", "urinate", "urine", - "urn", "usage", "use", "useful", "usefulness", - "useless", "user", "usher", "usherette", "ussr", - "usual", "usually", "usurer", "usurious", "usurp", - "usury", "utensil", "uterine", "uterus", "utilise", - "utilitarian", "utilitarianism", "utility", "utilize", "utmost", - "utopia", "utopian", "utter", "utterance", "utterly", - "uvula", "uvular", "uxorious", "vac", "vacancy", - "vacant", "vacate", "vacation", "vaccinate", "vaccination", - "vaccine", "vacillate", "vacuity", "vacuous", "vacuum", - "vagabond", "vagary", "vagina", "vaginal", "vagrancy", - "vagrant", "vague", "vain", "vainglorious", "vainglory", - "valance", "vale", "valediction", "valedictory", "valency", - "valentine", "valerian", "valet", "valetudinarian", "valiant", - "valiantly", "valid", "validate", "valise", "valley", - "valor", "valour", "valse", "valuable", "valuation", - "value", "valuer", "valve", "valvular", "vamoose", - "vamp", "vampire", "van", "vanadium", "vandal", - "vandalise", "vandalism", "vandalize", "vane", "vanguard", - "vanilla", "vanish", "vanity", "vanquish", "vantagepoint", - "vapid", "vapidity", "vapor", "vaporise", "vaporize", - "vaporous", "vapors", "vapour", "vapours", "variability", - "variable", "variance", "variant", "variation", "varicolored", - "varicoloured", "varicose", "varied", "variegated", "variegation", - "variety", "variform", "variorum", "various", "variously", - "varlet", "varmint", "varnish", "varsity", "vary", - "vascular", "vase", "vasectomy", "vaseline", "vassal", - "vassalage", "vast", "vastly", "vastness", "vat", - "vatican", "vaudeville", "vault", "vaulted", "vaulting", - "vaunt", "veal", "vector", "veer", "veg", - "vegan", "vegetable", "vegetarian", "vegetarianism", "vegetate", - "vegetation", "vehement", "vehicle", "vehicular", "veil", - "veiled", "vein", "veined", "veining", "velar", - "velarize", "veld", "veldt", "vellum", "velocipede", - "velocity", "velour", "velours", "velvet", "velveteen", - "velvety", "venal", "vend", "vendee", "vender", - "vendetta", "vendor", "veneer", "venerable", "venerate", - "venereal", "vengeance", "vengeful", "venial", "venison", - "venom", "venomous", "venous", "vent", "ventilate", - "ventilation", "ventilator", "ventricle", "ventriloquism", "ventriloquist", - "venture", "venturer", "venturesome", "venue", "veracious", - "veracity", "veranda", "verandah", "verb", "verbal", - "verbalise", "verbalize", "verbally", "verbatim", "verbena", - "verbiage", "verbose", "verbosity", "verdant", "verdict", - "verdigris", "verdure", "verge", "verger", "verify", - "verily", "verisimilitude", "veritable", "verity", "vermicelli", - "vermiculite", "vermiform", "vermifuge", "vermilion", "vermin", - "verminous", "vermouth", "vernacular", "vernal", "veronal", - "veronica", "verruca", "versatile", "verse", "versed", - "versification", "versify", "version", "verso", "versus", - "vertebra", "vertebrate", "vertex", "vertical", "vertiginous", - "vertigo", "verve", "very", "vesicle", "vesicular", - "vesper", "vespers", "vessel", "vest", "vestibule", - "vestige", "vestigial", "vestment", "vestry", "vestryman", - "vesture", "vet", "vetch", "veteran", "veterinary", - "veto", "vex", "vexation", "vexatious", "vhf", - "via", "viable", "viaduct", "vial", "viands", - "vibes", "vibrancy", "vibrant", "vibraphone", "vibrate", - "vibration", "vibrato", "vibrator", "vicar", "vicarage", - "vicarious", "vice", "vicelike", "viceregal", "vicereine", - "viceroy", "vicinity", "vicious", "vicissitudes", "victim", - "victimise", "victimize", "victor", "victorian", "victorious", - "victory", "victual", "victualer", "victualler", "victuals", - "vicuaa", "vicuana", "vide", "videlicet", "video", - "videotape", "vie", "view", "viewer", "viewfinder", - "viewless", "viewpoint", "vigil", "vigilance", "vigilant", - "vigilante", "vignette", "vigor", "vigorous", "vigour", - "viking", "vile", "vilification", "vilify", "villa", - "village", "villager", "villain", "villainies", "villainous", - "villainy", "villein", "villeinage", "villenage", "vim", - "vinaigrette", "vindicate", "vindication", "vindictive", "vine", - "vinegar", "vinegary", "vinery", "vineyard", "vino", - "vinous", "vintage", "vintner", "vinyl", "viol", - "viola", "violate", "violence", "violent", "violet", - "violin", "violoncello", "vip", "viper", "virago", - "virgin", "virginal", "virginals", "virginia", "virginity", - "virgo", "virgule", "virile", "virility", "virologist", - "virology", "virtu", "virtual", "virtually", "virtue", - "virtuosity", "virtuoso", "virtuous", "virulence", "virulent", - "virus", "visa", "visage", "viscera", "visceral", - "viscosity", "viscount", "viscountcy", "viscountess", "viscous", - "vise", "visibility", "visible", "visibly", "vision", - "visionary", "visit", "visitant", "visitation", "visiting", - "visitor", "visor", "vista", "visual", "visualise", - "visualize", "visually", "vital", "vitalise", "vitality", - "vitalize", "vitally", "vitals", "vitamin", "vitiate", - "viticulture", "vitreous", "vitrify", "vitriol", "vitriolic", - "vituperate", "vituperation", "vituperative", "vivace", "vivacious", - "vivarium", "vivid", "viviparous", "vivisect", "vivisection", - "vivisectionist", "vixen", "vixenish", "vizier", "vocab", - "vocabulary", "vocal", "vocalise", "vocalist", "vocalize", - "vocation", "vocational", "vocative", "vociferate", "vociferation", - "vociferous", "vodka", "vogue", "voice", "voiceless", - "void", "voile", "vol", "volatile", "volcanic", - "volcano", "vole", "volition", "volitional", "volley", - "volleyball", "volt", "voltage", "voluble", "volume", - "volumes", "voluminous", "voluntary", "volunteer", "voluptuary", - "voluptuous", "volute", "vomit", "voodoo", "voracious", - "vortex", "votary", "vote", "voter", "votive", - "vouch", "voucher", "vouchsafe", "vow", "vowel", - "voyage", "voyager", "voyages", "voyeur", "vtol", - "vulcanise", "vulcanite", "vulcanize", "vulgar", "vulgarian", - "vulgarise", "vulgarism", "vulgarity", "vulgarize", "vulgate", - "vulnerable", "vulpine", "vulture", "vulva", "wac", - "wack", "wacky", "wad", "wadding", "waddle", - "wade", "wader", "wadge", "wadi", "wady", - "wafer", "waffle", "waft", "wag", "wage", - "wager", "wages", "waggery", "waggish", "waggle", - "waggon", "waggoner", "waggonette", "wagon", "wagoner", - "wagonette", "wagtail", "waif", "wail", "wain", - "wainscot", "waist", "waistband", "waistcoat", "waistline", - "wait", "waiter", "waits", "waive", "waiver", - "wake", "wakeful", "waken", "waking", "walk", - "walkabout", "walkaway", "walker", "walking", "walkout", - "walkover", "wall", "walla", "wallaby", "wallah", - "wallet", "wallflower", "wallop", "walloping", "wallow", - "wallpaper", "walnut", "walrus", "waltz", "wampum", - "wan", "wand", "wander", "wanderer", "wandering", - "wanderings", "wanderlust", "wane", "wangle", "wank", - "wanker", "want", "wanting", "wanton", "wants", - "wapiti", "war", "warble", "warbler", "ward", - "warden", "warder", "wardrobe", "wardroom", "warehouse", - "wares", "warfare", "warhead", "warhorse", "warily", - "warlike", "warlock", "warlord", "warm", "warmonger", - "warmth", "warn", "warning", "warp", "warpath", - "warrant", "warrantee", "warrantor", "warranty", "warren", - "warrior", "warship", "wart", "warthog", "wartime", - "wary", "was", "wash", "washable", "washbasin", - "washboard", "washbowl", "washcloth", "washday", "washer", - "washerwoman", "washhouse", "washing", "washout", "washroom", - "washstand", "washwoman", "washy", "wasp", "waspish", - "wassail", "wast", "wastage", "waste", "wasteful", - "waster", "wastrel", "watch", "watchband", "watchdog", - "watches", "watchful", "watchmaker", "watchman", "watchtower", - "watchword", "water", "waterborne", "watercolor", "watercolour", - "watercourse", "watercress", "waterfall", "waterfowl", "waterfront", - "waterhole", "waterline", "waterlogged", "waterloo", "waterman", - "watermark", "watermelon", "watermill", "waterpower", "waterproof", - "waters", "watershed", "waterside", "waterspout", "watertight", - "waterway", "waterwheel", "waterwings", "waterworks", "watery", - "watt", "wattage", "wattle", "wave", "wavelength", - "waver", "wavy", "wax", "waxen", "waxworks", - "waxy", "way", "waybill", "wayfarer", "wayfaring", - "waylay", "ways", "wayside", "wayward", "weak", - "weaken", "weakling", "weakness", "weal", "weald", - "wealth", "wealthy", "wean", "weapon", "weaponry", - "wear", "wearing", "wearisome", "weary", "weasel", - "weather", "weatherboard", "weathercock", "weatherglass", "weatherman", - "weatherproof", "weathers", "weave", "weaver", "web", - "webbed", "webbing", "wed", "wedded", "wedding", - "wedge", "wedged", "wedgwood", "wedlock", "wednesday", - "wee", "weed", "weeds", "weedy", "week", - "weekday", "weekend", "weekender", "weekly", "weeknight", - "weeny", "weep", "weeping", "weepy", "weevil", - "weft", "weigh", "weighbridge", "weight", "weighted", - "weighting", "weightless", "weighty", "weir", "weird", - "weirdie", "weirdo", "welch", "welcome", "weld", - "welder", "welfare", "welkin", "well", "wellbeing", - "wellborn", "wellington", "wellspring", "welsh", "welt", - "weltanschauung", "welter", "welterweight", "wen", "wench", - "wend", "wensleydale", "went", "wept", "were", - "werewolf", "wert", "wesleyan", "west", "westbound", - "westerly", "western", "westerner", "westernise", "westernize", - "westernmost", "westward", "westwards", "wet", "wether", - "wetting", "whack", "whacked", "whacker", "whacking", - "whale", "whalebone", "whaler", "whaling", "wham", - "wharf", "what", "whatever", "whatnot", "wheat", - "wheaten", "wheedle", "wheel", "wheelbarrow", "wheelbase", - "wheelchair", "wheelhouse", "wheeling", "wheels", "wheelwright", - "wheeze", "wheezy", "whelk", "whelp", "when", - "whence", "whenever", "where", "whereabouts", "whereas", - "whereat", "whereby", "wherefore", "wherefores", "wherein", - "whereof", "whereon", "wheresoever", "whereto", "whereupon", - "wherever", "wherewithal", "wherry", "whet", "whether", - "whetstone", "whew", "whey", "which", "whichever", - "whiff", "whiffy", "whig", "while", "whim", - "whimper", "whimsey", "whimsical", "whimsicality", "whimsy", - "whin", "whine", "whiner", "whinny", "whip", - "whipcord", "whiplash", "whippersnapper", "whippet", "whipping", - "whippoorwill", "whippy", "whir", "whirl", "whirligig", - "whirlpool", "whirlwind", "whirlybird", "whirr", "whisk", - "whisker", "whiskered", "whiskers", "whiskey", "whisky", - "whisper", "whist", "whistle", "whit", "white", - "whitebait", "whitehall", "whiten", "whitening", "whites", - "whitethorn", "whitethroat", "whitewash", "whither", "whiting", - "whitlow", "whitsun", "whitsuntide", "whittle", "whiz", - "whizz", "who", "whoa", "whodunit", "whoever", - "whole", "wholemeal", "wholesale", "wholesaler", "wholesome", - "wholly", "whom", "whoop", "whoopee", "whoosh", - "whop", "whopper", "whopping", "whore", "whorehouse", - "whoremonger", "whorl", "whortleberry", "whose", "whosoever", - "why", "whys", "wick", "wicked", "wicker", - "wickerwork", "wicket", "wide", "widely", "widen", - "widespread", "widgeon", "widow", "widowed", "widower", - "widowhood", "width", "wield", "wife", "wifely", - "wig", "wigged", "wigging", "wiggle", "wight", - "wigwam", "wilco", "wild", "wildcat", "wildebeest", - "wilderness", "wildfire", "wildfowl", "wildlife", "wildly", - "wile", "wiles", "wilful", "wiliness", "will", - "willful", "willies", "willing", "willow", "willowy", - "willpower", "wilt", "wily", "wimple", "wimpy", - "win", "wince", "winceyette", "winch", "wind", - "windbag", "windbreak", "windcheater", "windfall", "windily", - "winding", "windjammer", "windlass", "windless", "windmill", - "window", "windowpane", "windowsill", "windpipe", "windscreen", - "windshield", "windsock", "windstorm", "windswept", "windward", - "windy", "wine", "winebibbing", "wineglass", "winepress", - "wineskin", "wing", "winger", "wings", "wingspan", - "wink", "winkers", "winkle", "winner", "winning", - "winnings", "winnow", "winsome", "winter", "wintergreen", - "wintertime", "wintry", "wipe", "wiper", "wire", - "wirecutters", "wireless", "wiretap", "wireworm", "wiring", - "wiry", "wisdom", "wise", "wisecrack", "wish", - "wishbone", "wisp", "wispy", "wisteria", "wistful", - "wit", "witch", "witchcraft", "witchdoctor", "witchery", - "witching", "with", "withal", "withdraw", "withdrawal", - "withdrawn", "withe", "wither", "withering", "withers", - "withhold", "within", "without", "withstand", "withy", - "witless", "witness", "witticism", "witting", "witty", - "wives", "wizard", "wizardry", "wizened", "woad", - "wobble", "wobbly", "woe", "woebegone", "woeful", - "wog", "woke", "woken", "wold", "wolf", - "wolfhound", "wolfram", "wolfsbane", "woman", "womanhood", - "womanise", "womanish", "womanize", "womankind", "womanly", - "womb", "wombat", "womenfolk", "won", "wonder", - "wonderful", "wonderland", "wonderment", "wonders", "wondrous", - "wonky", "wont", "wonted", "woo", "wood", - "woodbine", "woodblock", "woodcock", "woodcraft", "woodcut", - "woodcutter", "wooded", "wooden", "woodenheaded", "woodland", - "woodlouse", "woodpecker", "woodpile", "woodshed", "woodsman", - "woodwind", "woodwork", "woodworm", "woody", "wooer", - "woof", "woofer", "wool", "woolen", "woolens", - "woolgather", "woolgathering", "woollen", "woollens", "woolly", - "woolsack", "woozy", "wop", "word", "wording", - "wordless", "wordplay", "words", "wordy", "wore", - "work", "workable", "workaday", "workbag", "workbasket", - "workbench", "workbook", "workday", "worker", "workhorse", - "workhouse", "working", "workings", "workman", "workmanlike", - "workmanship", "workout", "workpeople", "workroom", "works", - "workshop", "worktop", "world", "worldly", "worldshaking", - "worldwide", "worm", "wormhole", "wormwood", "wormy", - "worn", "worried", "worrisome", "worry", "worse", - "worsen", "worship", "worshipful", "worst", "worsted", - "wort", "worth", "worthless", "worthwhile", "worthy", - "wot", "wotcher", "would", "wouldst", "wound", - "wove", "woven", "wow", "wrac", "wrack", - "wraith", "wrangle", "wrangler", "wrap", "wrapper", - "wrapping", "wrath", "wreak", "wreath", "wreathe", - "wreck", "wreckage", "wrecker", "wren", "wrench", - "wrest", "wrestle", "wretch", "wretched", "wriggle", - "wright", "wring", "wringer", "wrinkle", "wrist", - "wristband", "wristlet", "wristwatch", "wristy", "writ", - "write", "writer", "writhe", "writing", "writings", - "written", "wrong", "wrongdoing", "wrongful", "wrongheaded", - "wrote", "wroth", "wrought", "wrung", "wry", - "wurst", "wyvern", "xenon", "xenophobia", "xerox", - "xylophone", "yacht", "yachting", "yachtsman", "yahoo", - "yak", "yam", "yammer", "yang", "yank", - "yankee", "yap", "yard", "yardage", "yardarm", - "yardstick", "yarn", "yarrow", "yashmak", "yaw", - "yawl", "yawn", "yaws", "yea", "yeah", - "year", "yearbook", "yearling", "yearlong", "yearly", - "yearn", "yearning", "years", "yeast", "yeasty", - "yell", "yellow", "yelp", "yen", "yeoman", - "yeomanry", "yes", "yesterday", "yet", "yeti", - "yew", "yid", "yiddish", "yield", "yielding", - "yin", "yippee", "yobbo", "yodel", "yoga", - "yoghurt", "yogi", "yogurt", "yoke", "yokel", - "yolk", "yonder", "yonks", "yore", "yorker", - "you", "young", "younger", "youngster", "your", - "yours", "yourself", "youth", "youthful", "yowl", - "yoyo", "yucca", "yule", "yuletide", "zany", - "zeal", "zealot", "zealotry", "zealous", "zebra", - "zebu", "zed", "zeitgeist", "zen", "zenana", - "zenith", "zephyr", "zeppelin", "zero", "zest", - "ziggurat", "zigzag", "zinc", "zinnia", "zionism", - "zip", "zipper", "zippy", "zither", "zizz", - "zodiac", "zombi", "zombie", "zonal", "zone", - "zoning", "zonked", "zoo", "zoologist", "zoology", - "zoom", "zoophyte", "zouave", "zucchini", "zulu", - }; - } -}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/En/KStemFilter.cs ---------------------------------------------------------------------- diff --git a/src/contrib/Analyzers/En/KStemFilter.cs b/src/contrib/Analyzers/En/KStemFilter.cs deleted file mode 100644 index 32ba36b..0000000 --- a/src/contrib/Analyzers/En/KStemFilter.cs +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using Lucene.Net.Analysis.Tokenattributes; - -namespace Lucene.Net.Analysis.En -{ - public class KStemFilter : TokenFilter - { - private readonly KStemmer stemmer = new KStemmer(); - private readonly ITermAttribute termAttribute; - // private IKeywordAttribute keywordAtt; TODO - - public KStemFilter(TokenStream input) - : base(input) - { - termAttribute = AddAttribute<ITermAttribute>(); - } - - public override bool IncrementToken() - { - if (!input.IncrementToken()) - return false; - - if (/*(!keywordAtt.isKeyword()) && */ stemmer.stem(termAttribute.Term, 0)) - { - termAttribute.SetTermBuffer(stemmer.asString()); - } - - return true; - } - - } -}
