http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/apache_license_header.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/apache_license_header.txt b/test/resources/tokenization/apache_license_header.txt new file mode 100644 index 0000000..d973dce --- /dev/null +++ b/test/resources/tokenization/apache_license_header.txt @@ -0,0 +1,16 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/ja_jp_1.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/ja_jp_1.txt b/test/resources/tokenization/ja_jp_1.txt new file mode 100644 index 0000000..1a0a198 --- /dev/null +++ b/test/resources/tokenization/ja_jp_1.txt @@ -0,0 +1 @@ +å¤åæ¬ã¯é¡åã®è¨ããã¦ããªããã®ãå¤ããè¨ããã¦ããå ´åã§ãã£ã¦ãå 容ã¯ãã¾ãã¾ã§ããããæºæ°ç©èªãã®å ´åã¯ååã®æ¨é¡ã¨ãã¦ãæºæ°ç©èªããªããããã«ç¸å½ããç©èªå ¨ä½ã®æ¨é¡ãè¨ããã¦ããå ´åããããããããã®å¸åãè¨ããã¦ãããã¨ãå°ãªããªããããããçµç·¯ãããç¾å¨ã«ããã¦ä¸è¬ã«ãæºæ°ç©èªãã¨å¼ã°ãã¦ãããã®ç©èªãæ¸ãããå½æã®é¡åãä½ã§ãã£ãã®ãã¯æããã§ã¯ãªããå¤ãæ代ã®åæ¬ã注éæ¸ãªã©ã®æç®ã«è¨ããã¦ããå称ã¯å¤§ãã以ä¸ã®ç³»çµ±ã«åãããã \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/ja_jp_2.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/ja_jp_2.txt b/test/resources/tokenization/ja_jp_2.txt new file mode 100644 index 0000000..278b4fd --- /dev/null +++ b/test/resources/tokenization/ja_jp_2.txt @@ -0,0 +1,2 @@ +ä¸é幸ä¸ç·¨ãå¸¸ç¨ æºæ°ç©èªè¦è¦§ãæ¦èµéæ¸é¢ã1997å¹´ï¼å¹³æ9å¹´ï¼ã ISBN 4-8386-0383-5 +ãã®ä»ã«CD-ROMåãããæ¬ææ¤ç´¢ã·ã¹ãã ã¨ãã¦æ¬¡ã®ãããªãã®ãããã \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/lorem_ipsum.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/lorem_ipsum.txt b/test/resources/tokenization/lorem_ipsum.txt new file mode 100644 index 0000000..14a4477 --- /dev/null +++ b/test/resources/tokenization/lorem_ipsum.txt @@ -0,0 +1 @@ +"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/ru_ru_1.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/ru_ru_1.txt b/test/resources/tokenization/ru_ru_1.txt new file mode 100644 index 0000000..c19a9be --- /dev/null +++ b/test/resources/tokenization/ru_ru_1.txt @@ -0,0 +1,19 @@ +ÐÑл ÑабÑлаз ÑÑÑикеÑнди виÑÑпÑÑаÑоÑебÑз Ñи, кÑм нобÑз дикÑÑÑÑ ÑнвидÑÐ½Ñ ÐµÐ´. ЮÑÑ Ð·Ð¾Ð»ÑÑ Ð¸Ð¹Ð¶ÐºÐ²ÑÑ Ñа, нÑк но ÑлиÑÑ Ð²Ð¾Ð»ÑпÑÑа пÑÑкÑпиÑÑÑ. Ð«Ñ Ð²ÐµÐºÐ¶ декам плÑÑаÑонÑм, Ñа жÑмо ÑÑдÑÐºÐ°Ð±ÐµÑ Ð»ÑебÑÑавиÑÑÑ ÐºÐ²Ñй, алÑбÑкиÑÑ Ð»ÑгÑÐ½Ð´Ð¾Ñ ÑÑ Ð¿ÑÑ. ÐÑж ед аÑÑÑм нÑмквÑам ÑебиквÑÑ, Ñи амÑÑ Ð´ÑбÑÑ Ð½ÑллÑам квÑо. ÐÑ Ð·Ð¾Ð»ÑÑ Ð¿Ð¾Ð½Ð´ÑÑÑм ÑлÑÑÑеÑÑнд Ñ Ð°Ð¶, вÑÑ ÑнвидÑÐ½Ñ Ð´ÑÑиниÑеонÑм Ñкз, конгÑÑ ÐºÑÑÑÑож квÑо аÑ. + +Ðд ÑиÑÑÑÐ½Ñ ÑкжплÑÑикаÑи нÑк, ÑÑ Ð´Ð¾Ð»ÑоÑÑ ÐµÐ¼Ð¿ÑÑÑÑÑ Ð·Ñд. ÐÑд ажжÑм пÑÑжÑкÑÑи жкÑÑпÑÑÑÐ¸Ñ ÑÑ, ÑÑм ан виÑÐ°Ñ Ð°Ð»ÑквÑид дÑÑзÑнÑиÑÑ. ÐÑл квÑандо ÑидÑнж ÑÑ. ÐÑж жÑмпÑÑ ÐºÐ¾Ð½ÐºÐ»ÑÑÑжионÑмквÑÑ Ð½Ñ. + +ÐÑ Ñ Ñз поÑÑо ÑамквÑам плÑÑаÑонÑм, лÑабоÑÑ ÑпикÑÑÑи вÑл ÑÑ. Ðо енÑÑгÑÑ ÐºÐ¾Ð¼Ð¿Ð»ÑÑкÑÑÑÑÑ Ð¼ÑÑ, дÑо жанкÑÑÑ Ð´ÑлÑÑÐ½Ð¹Ñ Ð»ÑебÑÑавиÑÑÑ Ð½Ñ. ÐÐ¶Ñ ÑÐ°Ð»Ð»Ñ Ð¿ÑопÑÑÐ°Ñ ÑÑ, Ñож вÑÑÑÐ°Ñ ÑнÑÑÐ¼Ð¹Ñ Ð°Ð½. ÐÑндй конÑÑнÑÑонÑж пÑÑ Ð°Ñ. ÐÑож ÐºÐ¾Ð¿Ð¸Ð¾Ð¶Ð°Ñ Ð¿ÑÑÑнÑÑÑм ÑÑа Ñи. + +Ðо мÑÐ»Ñ Ð¾Ð¼Ð½Ð¸Ñм ÑÑпÑдÑандаÑ. ÐÑо ÑÑ ÐºÐ²ÑÐ¾Ñ Ð¸ÑваÑÑÑ, ÑÑ Ð²ÐµÐºÐ¶ квÑаÑÑÑио алÑиквÑандо, ÑÑ Ð¼ÑлÑÑÑз ÑÑакÑаÑоз Ð¿Ñ Ð¹Ð»Ð¾Ð¶Ð¾Ð¿Ñ Ñа векж. Ðд нам нÑллÑам гÑбÑÑгÑÑн, ÑÑ Ð¾ÑаÑио иÑеÑÑÑ ÐºÐ¾Ð¼Ð¼ÑÐ½Ñ Ð¿ÑÑ, векж ед золÑÑ ÑбÑквÑÑ ÑингÑлÑиÑ. ÐÑлÑÑÑз гÑаÑкйж волÑÑпÑаÑибÑж мÑÑ ÐµÐ´, одео емпÑÐ´Ð¸Ñ Ð¼Ð°Ð¹ÑжÑаÑйж Ñож ÑÑ, Ñа Ð´Ð¹ÐºÐ¸Ñ Ð·Ð°Ð»ÑÑÑанде квÑалÑизквÑÑ ÑÑÑ. Ðн еÑж анкиллÑÐ°Ñ Ð°ÐºÐºÐ¾Ð¼Ð¼Ð¾Ð´Ð°ÑÑ, ан вÑлÑÑÑ Ð¾Ð¿ÑÑон ÑвÑÑÑÑÑÑÑ Ð²Ñл. + +Ðн гÑаÑко дÑÑÑÑÑÐ½Ñ Ð¼Ð°Ð½Ð´Ð°Ð¼ÑÑ Ð¼Ñа. Ðо пÑо ÑолÑÑÐ°Ñ Ð¿ÑимиÑ, нÑк ан ÑÑÑам долÑоÑÑ ÑлÑÑÑеÑÑнд. Ыам Ð½Ñ ÐºÐ²Ñандо ноÑÑÑÑ. ÐÑмоÑÑ Ð´Ð¸ÐºÑÐ½Ñ Ð°Ð´ Ñ Ð°Ð¶, Ñ Ð°Ð¶ квÑод дÑко ÑÑежмод ÑÑ, амÑÑ Ð´ÑÑиниÑеонÑм еÑж ÑÑ. ÐÐ¶Ñ Ð°Ð´ апÑÑÑÑÑ Ð°Ð¿ÑиÑÑан, кÑм залÑÑ ÑÑкÑÑквÑÑ Ð½Ñ. + +Ðи ÑÑÑдиÑÑ ÑакÑлÑиÑи еÑж, Ñам долÑоÑÑ ÑабÑлаз вокÑбÑÑ Ð°Ñ. ÐÑо опоÑÑÑÑÑ Ð°Ð·Ð¶ÑÑвÑÑÐ¸Ñ Ð¹Ð½. ÐовÑÑ Ð°Ñдиам ÑÑÑ ÑÑ, Ð½Ñ ÐµÐ´ÐºÐ²ÑÑ Ð¿ÑÑÐºÐ¹Ð¿ÐµÑ ÐºÐ²ÑалÑизквÑÑ Ñ Ñз. Ðа кÑм ÑÑÑам ÑÑÐ°Ð½Ñ Ð³ÑаÑкÑ. ÐÑ Ð¿ÑÑ Ð¼Ð¾Ð´Ð¾ ÑпикÑÑе жплÑндидÑ, Ð°Ñ ÑÑм ÑÐ°Ð»Ð»Ñ Ð¿Ð¾Ð¶ÑÑа Ð¿Ñ Ð°ÑдÑÑм, ÑÑÑÑ Ð²ÑÑйÑÑж Ð½Ñ Ð²Ð¸Ð¼. ÐонгÑÑ Ð¾ÑаÑио лобоÑÑÐ¸Ñ ÑÑ ÐºÑм. ÐобÑз опоÑÑÑÐ°Ñ Ð½Ð¾ жÑÑ, вÑÑйÑÑж пÑÑÐºÐ¹Ð¿ÐµÑ Ð¼Ð°Ð»ÑÑизÑÑÑ ÐºÐ²Ñй ан. + +ÐаÑим ÑÑенам ÑÑÑÑÑÑÑнÑÑÑ Ð²Ð¸Ð¼ аÑ. ÐÑ ÐºÐ²Ñо квÑÐ°Ñ Ð´Ð¸Ð³Ð½Ð¸Ð¶Ð¶Ð¸Ð¼, одео жолÑÑÑа ÑебиквÑÑ Ð¼Ñа ÑÑ. Ðд нобÑз ÑанÑаз лÑаоÑÑÑÑ Ð²Ñл, еÑж йн лаÑÐ¸Ð½Ñ ÑабÑлаз аккÑжамÑз, пÑÑ Ð°Ð¿ÑиÑÑан адолÑжкÑÐ½Ñ Ð¿Ð¾Ð¶Ð¹Ð´Ð¾Ð½ÑÑм ÑÑ. ÐонÑÑквÑÐ°Ñ ÐºÐ¾ÑÑдиÑквÑÑ ÑÑм ан, Ñ Ñз ÑÑ Ñ Ð°Ð±ÑмÑÑ ÑпикÑÑÑи ÑÑнзÑÑеÑ. ÐÑ ÐºÐ²Ñй дÑбÑÑ Ð²Ð¸Ñйз, нам ÑÑ ÑлÑÐ¸Ñ ÑабÑÐ»Ð»Ð°Ñ Ð´ÑлÑÑÑкаÑезÑимÑ. ÐонÑÑлаÑÑ Ð¸Ð½Ð·ÑÑÑкÑÐµÐ¾Ñ Ñа кÑм, конжÑÐ»Ñ ÑÑÑгаÑÑ ÐºÐ¾Ð½ÑÑлаÑÑ ÑÑ Ñам, вÑÑ Ñи ÑÑÑгаÑÑ Ð°Ð½Ñеопам. + +ЮллÑм оÑаÑио конÑÑквÑÐ°Ñ ÑÑ Ð²Ñл, вÑлÑÑÑ ÑÑпÑдÑаÑÑ Ñ ÑндÑÑÑÐµÑ ÑÑ Ð¿ÑÑ. УнÑм ÑкÑпÑÑÑнда ÑоÑквÑаÑоз ад векж. ÐвÑо мÑÑÐ°Ñ ÑебиквÑÑ ÑакилÑизиж Ñи, Ñа Ñам ÑÑÐ³Ð¸Ñ ÑакемаÑÑÑ Ð´ÑÑзÑнÑиÑÑ, Ñкз пÑо Ð°Ð±Ñ Ð¾ÑÑÑÐ°Ð½Ñ Ð´Ð¹Ð¶Ð¿ÑÑандо. ÐÑ Ñ Ð°Ð¶ лÑабÑÑÑÑ ÑÑепÑÑÑ, нолÑÑÐ¶Ð¶Ñ ÑÑдÑÐºÐ°Ð±ÐµÑ Ð¿ÑÑ ÑÑ. ТоÑа долоÑÑм азжÑÑвÑÑÐ¸Ñ Ð¿ÑÑ ÑÑ, нÑк залÑÑ ÑлиÑÑ Ð´Ð¸ÐºÑÑÑÑ ÑÑ. Ðд дÑо ÑкжплÑÑикаÑи мнÑжаÑÐºÑ Ñм конклÑÑÑжионÑмквÑÑ. + +ÐонÑÑлаÑÑ Ð°Ð·Ð¶ÑÑвÑÑÐ¸Ñ Ð½Ñ Ð·Ñд. ÐÑл но квÑÑм гÑаÑкйж ÑÑбанйÑаж. ÐÑо ÑÑÑÑкианÑÑÑ Ð´ÑÑÑнÑÑйонÑÑ ÑÑ, зÑаз ÑÑÐ°Ñ ÐºÐ¾Ð½ÐºÑпÑам векж ÑÑ. ЮллÑм зÑжкепианÑÑÑ Ñкз пÑÑ, оÑаÑио нонÑмй оÑнаÑÑÑ Ñи Ñож. Ðож ÑакемаÑÑÑ ÑÑнзÑÑÐµÑ Ð°Ð´, Ð°Ñ Ñакилиз пÑÑÐºÐ¹Ð¿ÐµÑ Ð¿ÑÑжÑкÑÑи нÑк, аппаÑÑÐ°Ñ ÑÑкÑÑквÑÑ Ñкз зÑд. ÐÑм йн вÑвÑндо дÑÑÑакÑо окÑÑÑÑÑÑÑ. + +ШÑа ÑÑквÑÑ ÑолÑÑÐ°Ñ ÑабÑÐ»Ð»Ð°Ñ ÑÑ, Ñ Ð°Ð¶ вÑлÑÑÑ ÑÑÑÑкианÑÑÑ ÐºÐ¾Ð¼Ð¿ÑÑÑ Ñнжам аÑ. Ð¢Ñ Ð¼ÑÑ ÑзÑÑ Ð°Ð¶Ð¶Ñм апÑиÑÑан, Ð°Ñ Ð´Ð¾ÐºÑÐ½Ð´Ñ ÐºÐ¾Ð½ÐºÑпÑам еÑж. ÐожÑÑÑд жанкÑÑÑ ÑвÑÑÑÑÑÑÑ ÑÑ Ð²ÑÑ, но пÑÐ¸Ð¼Ð¸Ñ Ð¿ÑомпÑа пÑÑÑÑквÑÑÑÑж дÑо. ÐÑÑо мÑÑÐ°Ñ Ð¾Ð¼Ð½ÑжквÑÑ Ñам ÑÑ. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/top_visited_domains.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/top_visited_domains.txt b/test/resources/tokenization/top_visited_domains.txt new file mode 100644 index 0000000..0238c36 --- /dev/null +++ b/test/resources/tokenization/top_visited_domains.txt @@ -0,0 +1,3 @@ +google.com facebook.com youtube.com yahoo.com baidu.com amazon.com wikipedia.org taobao.com twitter.com Qq.com google.co.in apple.com + +http://www.alexa.com/topsites \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/resources/tokenization/zn_tw_1.txt ---------------------------------------------------------------------- diff --git a/test/resources/tokenization/zn_tw_1.txt b/test/resources/tokenization/zn_tw_1.txt new file mode 100644 index 0000000..7e1e545 --- /dev/null +++ b/test/resources/tokenization/zn_tw_1.txt @@ -0,0 +1,19 @@ +éé鬿 èèè¢ ç «ç²ç´ é¡é¤ è, è¶è· è·¬ éé¡é¥ è½é¨¹é¼ æ¨æ£ æ å·å¿ å«· æææ é³é±¨é±®, æ» æµ¶æ´¯æµ½ ç£ç¿ç å¶æ±æ é§å¾ æ±æ 硾 æ¹¹æ¸µç² é§éé åå¥ ççç ç¿£èè¢ è, 毼 å³å¢º èèèµ² ç¨¨çª¨ç® å±å¨µå¾, ééé ç©çç 庲æ æ§ éé鬿 ççç ç¿ç¶ç ³ é ç³ç¸ é¦ è´è è²è¶è·° è°èè¯, åå塨 ç¤ç° ç¸ å©å² è·£, æ¥ æ¾æ¿ å¢åå¡ å§å¼£æ¶ æ橪橤 + +崺崸 ç§çç£ ç£ç¿ç å«·, å¢ é½´è®éº¡ æ¯ç襳 æ¯ä¸®å¹ ç¿è® 箯 å±å¨µ èè è¡ ç¤¯ç±ç¾» éé¾é°, å¼å¨ èè褩 ç«ç¯´è 槶, éµ³é½é½ é©é·é·© çµç¿è è¼ å¬¦æ¼ èåé 訬è»é² è¿èè¿ é é¤ æ§¶, æ°æ å¬ è»¹è»¦è»µ é¸è®é 椵æ¥æº 渳湥ç èè¶è¨ è·è»¥ å«, ç ¯ç ¨ å¢ é¨é·é ç¸ç½è¾, é¹é¤³é§· çç¿è® ç©çç ç¢¡ç¢ é + +çç£ èè¶è¡ ç¢ç¢ç¢ æ¨æ¨¦æ½ ç©, æ²ææ° å©°å©å² 踣 ç¹ç¹ ççç æºç ç åå é¤, è·è è¼ ç¦ç®ç® ç¬ç¾ 槶 ç¾çç¸ ééé¦ æ å§´æ¤ çç 鳻嶬幧 èªè¶è·® å¢ + +å¢ é¬µéµåª å©è°æ èè§è 餳駷, è® å»æ¦ æ¶æªæª¦ 謺è²è¹ æ¾ é§é§é³¿ èè§è èè´è å壴, ç » åºå¼å¨ ç°»èè± ç¬ç² çµ¼ é¨é¬µ æ¨æ§·æ®¦ 縸縩è å·æ°ç, ç¸çç© é¯¦é¯¢é¯¡ 罫è±è 齥廲 æ» é¯ é¯¦ å®å¦å å¿æ¨æ£ éé±é¸ é å¯åµ è® ç¿ç¨ç« 顤鰩é·, æ¢ èèèª æ¦æç¬ çç¿è® å¢å£ + +å èèè® é±éé é¬éµéµ ç¹è æ¯ä¸® å¢å¥¾ç± æ²ææ° æ¤µæ¥æº æ , é¬éµ é¼å³å¢º å·æ°ç é, é³± éµ³é½é½ é¥é½çª é¨§é¨ è½é¨¹é¼ æºç ç è« å²µå¸, ç » å»¦å»¥å½ ç¹ ç¹ç¹ é¦¦é¨§é¨ é½é½ ç ç å¥åå ® 軹軦軵 壿, æè± é ³ åå¬å¬¨ å§å¸¡æ¦, æ¿æ 榱 櫧æ«ç© å» æ®æ« 驧鬤 è·¾ + +ç¶ é»é¥é¨´ èè¦è²¹ ç¨ç¨ç¨ ç¨¢ç¶ ç¬¢ç¬£ç´½ ç£ç®¹ç³ ç½ æ°å, åå² ç¦ é¶åµå· æ æºæ, ééºé¯ ççç ä¼å¢ æ® æ¢ ç®ç® é¦ºéª±é¡ æ½§æ½£ç½ è§¶èè å¡ è±¥è¶ ææ 毤 幨æ æ´ ç¨ç¨ç¨, ææ±æ² é§é§é´ é»éé£ è¯ è¨ç´± 縢羱 æ§æ®æ® æµæ¶ç¼¹ é»éé£ è¼, 籺籿 åªå¯åµ æ¨§æ§§æ¨ ç硱ç ç + +è趵é ç¢ç¢ç¢ å¹ ç»ç è¼ è£§é ç°èè¤ é¤ä»ä¼ 誽è³è³§ æ· æ· éé ³éª ç¾ç¬è 椵æ¥æº é¡ ç¿ç¶ç ³ 趡è¶è¸ 躨éé 馺 å¤å½ åºå¼å½¶ èè½é æ¯æ»±æ¼® 踣 墡å¬, è³ é¯¦é¯¢é¯¡ é½é¾å¢» è¼è¼ è¡èè± è¥è¥¡è¥ éé¸é ç´ç¿ æ¥, æ® æ¼ ç¾ç±è§ 磩ç£çª± ç 妵妶 æ¦ + +éé± ç¦ è¢è¢è§ é¤é¤é§ 椵æ¥æº é 欿殽 鬵éµåª é¬é¯ªé¯ ç¤ç° ç¸, å½ å¶ä»å è£è£è©· èè¶ èºè¶ è¶è· è·¬ ççç 廲ç±ç³´ æ® é¦éµ å§å¸¡æ¦ è³è¼é æ²ç¯ç«» å¢, æ©æ©æ¾ ç£ç¿ç ææ¸æ é ³ å»ç© å®å¦å ç§çç£ éé± æ, èº èè® å£¾åµ·å¹ è¤£è«è¶¥ + +è·¿ é®é®¥é´® ç²ç¯ç¯ 鵵鵹鵿 åå± èª å¡å«å« ééé å¤å½, çç é¦ºéª±é¡ é¾éé éé¡£é£ å¢, å£¾åµ·å¹ æ¬ææ³ éµ³é½é½ æ éé¶ æ¾ æ«æ°ç å¿æ±å£ è è¥ æ¶æªæª¦ 觶èè éªå ç½, 觾éé·¡ è¼éé¾ å» è¢è§ åå²å§ é¼å³å¢º æ¦¯æ§ è§¢, æ¦ é·é»«é¼± èè¢²è¤ éé¡£é£ é¥é¨´, è«è¸£è¸ 齸å趲 é é¶¾é· é©é·é·© ç¦ç®ç® çµ å¨å±, è¨èª èè½é èªè©è® æ¥ + +è¤ è¤è« èè¢è§ åååª ä¾¹å æ¦, ç·¦ æ¦æ» æèé° è»¥è»±é´ ç·· æ²æ ééºé¯ æææ¥, èªè³è·¿ å½æ³æ° è¢è±è²£ è¬è æ¦ åå åå¬å¬¨ é´é±±ç§ æ½£, æ²æ èªè³è·¿ è²µè¶è· é®é®å 溿 ç¦ ç¬ç² é½´è®éº¡ æ¼»æ¼ç 趡è¶è¸ , å» é¨©é°é° å³·ææµ çç \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/unit/org/apache/cassandra/SchemaLoader.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java b/test/unit/org/apache/cassandra/SchemaLoader.java index e375990..51db4cd 100644 --- a/test/unit/org/apache/cassandra/SchemaLoader.java +++ b/test/unit/org/apache/cassandra/SchemaLoader.java @@ -21,6 +21,9 @@ import java.io.File; import java.io.IOException; import java.util.*; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.index.sasi.SASIIndex; +import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder; import org.junit.After; import org.junit.BeforeClass; @@ -249,6 +252,8 @@ public class SchemaLoader + "WITH COMPACT STORAGE", ks_cql) ))); + if (DatabaseDescriptor.getPartitioner() instanceof Murmur3Partitioner) + schema.add(KeyspaceMetadata.create("sasi", KeyspaceParams.simpleTransient(1), Tables.of(sasiCFMD("sasi", "test_cf"), clusteringSASICFMD("sasi", "clustering_test_cf")))); if (Boolean.parseBoolean(System.getProperty("cassandra.test.compression", "false"))) useCompression(schema); @@ -475,6 +480,140 @@ public class SchemaLoader .compression(getCompressionParameters()); } + public static CFMetaData sasiCFMD(String ksName, String cfName) + { + CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName) + .addPartitionKey("id", UTF8Type.instance) + .addRegularColumn("first_name", UTF8Type.instance) + .addRegularColumn("last_name", UTF8Type.instance) + .addRegularColumn("age", Int32Type.instance) + .addRegularColumn("height", Int32Type.instance) + .addRegularColumn("timestamp", LongType.instance) + .addRegularColumn("address", UTF8Type.instance) + .addRegularColumn("score", DoubleType.instance) + .addRegularColumn("comment", UTF8Type.instance) + .addRegularColumn("comment_suffix_split", UTF8Type.instance) + .addRegularColumn("/output/full-name/", UTF8Type.instance) + .addRegularColumn("/data/output/id", UTF8Type.instance) + .addRegularColumn("first_name_prefix", UTF8Type.instance) + .build(); + + cfm.indexes(cfm.getIndexes() + .with(IndexMetadata.fromSchemaMetadata("first_name", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "first_name"); + put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("last_name", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "last_name"); + put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("age", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "age"); + + }})) + .with(IndexMetadata.fromSchemaMetadata("timestamp", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "timestamp"); + put("mode", OnDiskIndexBuilder.Mode.SPARSE.toString()); + + }})) + .with(IndexMetadata.fromSchemaMetadata("address", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer"); + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "address"); + put("mode", OnDiskIndexBuilder.Mode.PREFIX.toString()); + put("case_sensitive", "false"); + }})) + .with(IndexMetadata.fromSchemaMetadata("score", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "score"); + }})) + .with(IndexMetadata.fromSchemaMetadata("comment", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "comment"); + put("analyzed", "true"); + }})) + .with(IndexMetadata.fromSchemaMetadata("comment_suffix_split", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "comment_suffix_split"); + put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString()); + put("analyzed", "false"); + }})) + .with(IndexMetadata.fromSchemaMetadata("output_full_name", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "/output/full-name/"); + put("analyzed", "true"); + put("analyzer_class", "org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer"); + put("case_sensitive", "false"); + }})) + .with(IndexMetadata.fromSchemaMetadata("data_output_id", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "/data/output/id"); + put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("first_name_prefix", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "first_name_prefix"); + put("analyzed", "true"); + put("tokenization_normalize_lowercase", "true"); + }}))); + + return cfm; + } + + public static CFMetaData clusteringSASICFMD(String ksName, String cfName) + { + CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName) + .addPartitionKey("name", UTF8Type.instance) + .addClusteringColumn("location", UTF8Type.instance) + .addClusteringColumn("age", Int32Type.instance) + .addRegularColumn("height", Int32Type.instance) + .addRegularColumn("score", DoubleType.instance) + .build(); + + cfm.indexes(cfm.getIndexes() + .with(IndexMetadata.fromSchemaMetadata("location", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "location"); + put("mode", OnDiskIndexBuilder.Mode.PREFIX.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("age", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "age"); + put("mode", OnDiskIndexBuilder.Mode.PREFIX.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("height", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "height"); + put("mode", OnDiskIndexBuilder.Mode.PREFIX.toString()); + }})) + .with(IndexMetadata.fromSchemaMetadata("score", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>() + {{ + put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); + put(IndexTarget.TARGET_OPTION_NAME, "score"); + put("mode", OnDiskIndexBuilder.Mode.PREFIX.toString()); + }}))); + + return cfm; + } + public static CompressionParams getCompressionParameters() { return getCompressionParameters(null); http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/test/unit/org/apache/cassandra/cql3/KeyCacheCqlTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/KeyCacheCqlTest.java b/test/unit/org/apache/cassandra/cql3/KeyCacheCqlTest.java index 54d39b1..2529de1 100644 --- a/test/unit/org/apache/cassandra/cql3/KeyCacheCqlTest.java +++ b/test/unit/org/apache/cassandra/cql3/KeyCacheCqlTest.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.concurrent.Callable; import org.junit.Assert; import org.junit.Test; @@ -29,6 +30,7 @@ import org.junit.Test; import org.apache.cassandra.cache.KeyCacheKey; import org.apache.cassandra.config.Schema; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.index.Index; import org.apache.cassandra.metrics.CacheMetrics; import org.apache.cassandra.metrics.CassandraMetricsRegistry; import org.apache.cassandra.service.CacheService; @@ -407,7 +409,7 @@ public class KeyCacheCqlTest extends CQLTester if (index != null) { StorageService.instance.disableAutoCompaction(KEYSPACE, table + '.' + index); - Keyspace.open(KEYSPACE).getColumnFamilyStore(table).indexManager.getIndexByName(index).getBlockingFlushTask().call(); + triggerBlockingFlush(Keyspace.open(KEYSPACE).getColumnFamilyStore(table).indexManager.getIndexByName(index)); } for (int i = 0; i < 100; i++) @@ -432,7 +434,7 @@ public class KeyCacheCqlTest extends CQLTester { Keyspace.open(KEYSPACE).getColumnFamilyStore(table).forceFlush().get(); if (index != null) - Keyspace.open(KEYSPACE).getColumnFamilyStore(table).indexManager.getIndexByName(index).getBlockingFlushTask().call(); + triggerBlockingFlush(Keyspace.open(KEYSPACE).getColumnFamilyStore(table).indexManager.getIndexByName(index)); } } } @@ -464,4 +466,12 @@ public class KeyCacheCqlTest extends CQLTester Assert.assertEquals(0L, metrics.requests.getCount()); Assert.assertEquals(0L, metrics.size.getValue().longValue()); } + + private static void triggerBlockingFlush(Index index) throws Exception + { + assert index != null; + Callable<?> flushTask = index.getBlockingFlushTask(); + if (flushTask != null) + flushTask.call(); + } }