Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/language/uk.ngp URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/language/uk.ngp?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/resources/org/apache/tika/language/uk.ngp (original) +++ tika/trunk/tika-core/src/main/resources/org/apache/tika/language/uk.ngp Mon Nov 17 10:32:23 2014 @@ -1,1014 +1,1014 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -на_ 31005 -_на 30413 -_Ð¿Ñ 27783 -_по 25204 -ого 22546 -_Ñ_ 22494 -го_ 21091 -_за 20327 -ий_ 20183 -_в_ 20089 -Ð¸Ñ _ 20023 -_ви 19862 -_Ñо 19378 -_Ñа 19078 -_ÑÑ 18927 -Ñв_ 18762 -_ко 18571 -нÑ_ 18400 -Ð½Ð½Ñ 17493 -_Ð²Ñ 16666 -_до 16607 -Ñа_ 16281 -пÑо 16163 -ÑÑ_ 16121 -_Ñк 15972 -_з_ 15953 -_Ñ_ 15901 -ÑÑк 15538 -нÑ_ 14883 -_th 14863 -ÑÑо 14729 -вÑд 14181 -Ñи_ 13996 -the 13870 -ÑÑ_ 13749 -ом_ 13601 -ми_ 13215 -ÑÑа 12930 -еÑе 12896 -ÑÑ_ 12835 -Ð¿ÐµÑ 12814 -оÑ_ 12775 -Ñва 12375 -he_ 12322 -енн 11280 -_Ð¿Ñ 11279 -оÑ_ 11274 -лÑн 11135 -кÑ_ 10750 -ка_ 10741 -ван 10642 -_Ð±Ñ 10602 -иÑÑ 10393 -Ð°Ð»Ñ 10192 -_пе 10151 -Ð½Ð¸Ñ 10037 -ком 9970 -ÑÑÑ 9955 -мÑ_ 9909 -_Ñо 9841 -_не 9808 -on_ 9775 -er_ 9750 -ний 9654 -Ñко 9587 -аÑи 9472 -Ñо_ 9445 -ки_ 9293 -анн 9289 -ли_ 9275 -лÑ_ 9265 -Ð¾Ð»Ñ 9258 -ii_ 9168 -им_ 9115 -пÑи 9070 -но_ 8988 -_мо 8963 -оÑÑ 8878 -ла_ 8858 -ÑÐ»Ñ 8796 -_ма 8783 -ÑÑ_ 8699 -es_ 8469 -Ð¾Ð¼Ñ 8239 -ÑÑÑ 8186 -ÑÑÑ 8181 -лÑÑ 8175 -ала 8069 -не_ 7970 -до_ 7967 -Ñй_ 7880 -Ñоз 7833 -_of 7682 -_Ð¼Ñ 7638 -ÐºÐ¾Ñ 7525 -_Ñп 7516 -ion 7514 -_Ñн 7488 -_in 7412 -of_ 7353 -ови 7296 -Ð°ÐºÑ 7195 -ова 7175 -ÑÐ¾Ñ 7130 -ика 7023 -Ñол 6990 -нов 6948 -and 6892 -Ñки 6868 -оÑи 6860 -nd_ 6740 -_ÑÑ 6629 -_co 6626 -Ð´Ð»Ñ 6620 -ав_ 6588 -ног 6576 -оло 6571 -_дл 6564 -ÑÐ¸Ñ 6483 -ним 6435 -пÑд 6426 -_de 6391 -Ð°Ð½Ñ 6362 -_an 6356 -Ñд_ 6269 -Ñок 6196 -ÑÑи 6181 -Ð¾Ð²Ñ 6129 -Ñан 6125 -Ñак 6070 -ико 6038 -аÑÑ 6035 -Ð²ÐµÑ 6012 -Ñоб 5988 -_ве 5978 -аÑÑ 5953 -кий 5945 -ÑÑÑ 5902 -al_ 5899 -ово 5897 -лен 5834 -нÑ_ 5815 -ÑÑÑ 5800 -_ma 5784 -_ка 5754 -tio 5750 -али 5746 -ÑÑÑ 5735 -ÑÐ°Ñ 5723 -ÐµÐ½Ñ 5675 -ами 5660 -ÑÐµÑ 5657 -_Ñо 5635 -ни_ 5596 -ÑÑÑ 5553 -in_ 5547 -оÑо 5536 -_Ñе 5524 -_Ñе 5512 -_Ñв 5506 -кон 5476 -_де 5474 -_Ñе 5445 -_ба 5391 -ÑÑ_ 5385 -пов 5355 -бÑл 5331 -мÑÑ 5313 -ter 5306 -мен 5287 -ÑÑÑ 5276 -кла 5261 -зна 5256 -кÑи 5251 -an_ 5221 -Ñов 5161 -Ñод 5160 -_га 5147 -оÑÑ 5110 -_па 5103 -_ii 5092 -вик 5087 -ва_ 5054 -ном 5052 -она 5018 -ÑÑ_ 4997 -ÑÑÑ 4977 -оди 4956 -Ñед 4926 -ati 4905 -гал 4902 -ing 4883 -Ð¾ÐºÑ 4879 -ng_ 4863 -ког 4849 -ÑÑ_ 4841 -_ме 4826 -аÑ_ 4813 -ÑÑа 4809 -пÑа 4768 -вал 4763 -нÑÑ 4732 -ÑÑ_ 4726 -Ñки 4721 -_re 4712 -_Ñи 4712 -ло_ 4692 -ÑÑÑ 4662 -en_ 4628 -Ð½Ð¾Ñ 4628 -_во 4627 -is_ 4626 -_а_ 4607 -Ð°Ñ _ 4600 -ник 4599 -Ñк_ 4574 -омп 4565 -ent 4564 -гÑа 4562 -_Ð³Ñ 4560 -ани 4551 -Ð²Ð¾Ñ 4530 -ÑÑи 4524 -бÑл 4514 -за_ 4511 -Ñво 4476 -_Ñа 4467 -Ñик 4454 -але 4453 -Ñи_ 4444 -le_ 4443 -Ð°Ð»Ñ 4442 -_ал 4433 -ими 4416 -Ð²Ð°Ñ 4400 -Ñав 4384 -_Ð¾Ñ 4373 -дно 4373 -Ñдн 4368 -иÑн 4342 -ÑÑÑ 4312 -ÑÑ_ 4301 -ÑÑв 4280 -Ð»Ð°Ñ 4276 -кÑа 4273 -_об 4249 -лад 4248 -еÑÑ 4207 -Ð¾Ð²Ñ 4195 -одн 4169 -мÑн 4123 -_st 4116 -ков 4101 -us_ 4097 -зÑÑ 4094 -ÑÑÑ 4082 -Ñим 4081 -кÑ_ 4078 -ако 4074 -ÑÐ·Ñ 4072 -_i_ 4060 -ed_ 4058 -бÑв 4018 -Ñом 4003 -ÑÐµÑ 3991 -_Ð±Ñ 3985 -_Ñа 3978 -ано 3971 -_wi 3956 -_Ñ_ 3948 -re_ 3943 -Ñйн 3924 -_ÑÑ 3910 -еÑа 3908 -ив_ 3902 -нал 3886 -ÑÐ¿Ñ 3885 -лÑ_ 3870 -ени 3856 -нÑÑ 3856 -ÑÑе 3856 -_ca 3843 -Ñен 3841 -ÑÑз 3836 -_a_ 3826 -ÑÑн 3825 -дов 3822 -ia_ 3808 -кол 3793 -аÑо 3785 -кÑв 3779 -ÐºÐ¸Ñ 3762 -ÑÑв 3756 -овн 3752 -_ÑÑ 3751 -_pa 3742 -Ñал 3738 -_од 3736 -Ñан 3736 -Ñов 3734 -Ñно 3733 -ÑÐ¾Ñ 3713 -пол 3687 -_to 3680 -ÑÑв 3663 -Ñа_ 3662 -iii 3660 -Ñен 3657 -жен 3637 -ана 3619 -_li 3584 -иÑи 3576 -Ñив 3572 -_но 3553 -ÑÐ¾Ñ 3533 -кож 3524 -ÑÑо 3518 -Ñо_ 3513 -йÑÑ 3502 -_Ñе 3495 -ÑÐ¸Ñ 3488 -мож 3483 -лак 3473 -ÐºÐ¾Ñ 3468 -дин 3466 -_pr 3463 -Ð½Ð¾Ñ 3462 -Ñно 3423 -_Ñк 3421 -de_ 3419 -Ð¾Ð´Ñ 3408 -вол 3387 -Ñам 3380 -_йо 3369 -Ñал 3365 -вни 3364 -ÑÐ°Ñ 3363 -_vi 3357 -еле 3357 -_mi 3354 -оÑм 3343 -_ÐºÑ 3342 -ок_ 3339 -Ð¿Ð¸Ñ 3329 -Ñом 3326 -Ð¼ÐµÑ 3315 -вÑÑ 3299 -Ð¾Ð½Ñ 3283 -вÑ_ 3279 -or_ 3277 -Ñе_ 3249 -_Ñе 3234 -_го 3214 -_fo 3213 -ÑÐ¹Ñ 3195 -ден 3189 -лив 3184 -ага 3181 -_xv 3179 -Ñв_ 3161 -Ñни 3155 -_mo 3149 -анд 3145 -ne_ 3135 -Ð°Ð½Ñ 3132 -Ð½Ð°Ñ 3127 -_se 3113 -Ð¿Ð¾Ñ 3110 -ини 3107 -for 3105 -_зн 3103 -_s_ 3096 -_Ñа 3094 -ÐµÐºÑ 3090 -аÑÑ 3084 -_be 3081 -te_ 3074 -ож_ 3073 -_po 3068 -вно 3067 -вел 3058 -ÑÐ¾Ñ 3058 -нÑй 3057 -Ñни 3054 -вид 3046 -_Ð²Ñ 3044 -дÑ_ 3040 -_бе 3038 -ина 3038 -_xi 3022 -ÑÑ_ 3022 -_Ñ_ 3012 -_so 3011 -ce_ 3010 -ÑÑй 3010 -ÑÐ¾Ñ 3008 -або 3003 -лÑÑ 2994 -мов 2970 -мпа 2970 -Ð¿Ð¾Ñ 2969 -дом 2963 -дан 2960 -пÑв 2960 -же_ 2946 -Ñ Ð¾Ð´ 2945 -оÑÑ 2937 -Ñно 2922 -nte 2921 -ÑÑ_ 2914 -Ñав 2911 -дÑ_ 2891 -Ñво 2874 -_ch 2868 -_аб 2868 -иÑÑ 2859 -ÐµÐ½Ñ 2856 -оÑа 2855 -_Ð»Ñ 2852 -ÑÐ¸Ñ 2851 -to_ 2843 -_la 2839 -вÑÑ 2832 -ало 2824 -vii 2814 -ers 2812 -аÑн 2809 -иÑÑ 2808 -Ñин 2806 -ÑÐ°Ñ 2799 -_ан 2795 -Ð²Ð¸Ñ 2790 -еÑÑ 2789 -лик 2778 -st_ 2777 -Ð°Ð½Ñ 2775 -вав 2773 -бÑд 2771 -ÑÐºÑ 2769 -_di 2766 -еÑи 2763 -Ð½Ð¾Ñ 2760 -ман 2759 -кан 2757 -аÑа 2749 -лов 2746 -_sa 2738 -ry_ 2731 -ÑÐ½Ñ 2730 -Ñон 2727 -se_ 2717 -ica 2710 -лÑк 2709 -nt_ 2708 -най 2707 -ver 2705 -_да 2702 -_me 2701 -ено 2701 -обл 2700 -_Ñи 2699 -_al 2689 -йог 2685 -xvi 2676 -ели 2676 -бо_ 2667 -Ñвн 2659 -ic_ 2651 -Ð²Ð½Ñ 2650 -дже 2649 -зап 2649 -Ñем 2647 -ÑÐ°Ñ 2645 -во_ 2643 -аÑÑ 2642 -Ð¿Ð¾Ñ 2636 -ÑÑÑ 2636 -_un 2635 -ÑÐµÑ 2634 -ада 2630 -Ð¼Ð°Ñ 2621 -lin 2620 -бÑа 2613 -_ar 2608 -ила 2608 -Ð½Ð½Ñ 2607 -нÑÑ 2602 -Ñог 2595 -int 2589 -et_ 2588 -ÑÑÑ 2586 -ÑÐ¸Ñ 2580 -_ba 2575 -ива 2570 -опо 2566 -ns_ 2563 -оли 2561 -или 2556 -ода 2556 -_no 2554 -ate 2551 -аÑн 2549 -еÑÑ 2536 -Ð±ÐµÑ 2530 -ÑÐ²Ñ 2529 -еÑн 2527 -ist 2518 -нÑ_ 2511 -ch_ 2494 -Ð¾Ð»Ñ 2494 -ам_ 2491 -ix_ 2488 -ома 2484 -ine 2474 -ÑÑÑ 2471 -_fr 2470 -her 2467 -_зв 2465 -_Ð´Ñ 2461 -оÑÑ 2461 -tor 2452 -Ñез 2448 -_Ð½Ñ 2444 -Ñзн 2443 -Ñаз 2442 -_пл 2440 -Ñо_ 2439 -Ñко 2437 -_ro 2433 -ste 2428 -ким 2423 -Ð»Ð¾Ñ 2420 -Ð²Ð¸Ñ 2418 -Ð²Ð¾Ñ 2416 -_ва 2413 -нÑо 2408 -ÑÐ°Ñ 2404 -одо 2403 -_ав 2398 -ÑÑп 2395 -Ñн_ 2394 -sta 2391 -обо 2390 -der 2388 -оÑи 2388 -ивн 2386 -Ð»Ð¸Ñ 2383 -нÑв 2383 -ind 2377 -вÑй 2375 -_Ñи 2374 -оÑе 2372 -Ñи_ 2371 -el_ 2367 -Ð¾Ð³Ñ 2359 -ена 2352 -Ð´Ð¾Ñ 2351 -as_ 2350 -_ди 2348 -над 2347 -наз 2347 -_ÐºÑ 2341 -Ñва 2341 -дни 2337 -ÑÐµÑ 2337 -пÑе 2334 -омо 2329 -ÑÑ_ 2329 -_кл 2324 -_Ñо 2324 -Ñна 2314 -пан 2306 -лÑв 2304 -ди_ 2303 -нÑа 2302 -_is 2300 -ÑÑ_ 2300 -ÑÑ_ 2299 -Ñик 2298 -_ne 2287 -con 2287 -eri 2287 -Ð¾Ð´Ñ 2286 -_бо 2281 -без 2274 -ll_ 2271 -Ñм_ 2263 -Ñдо 2260 -еÑÑ 2256 -ма_ 2252 -ÐºÐ¾Ñ 2249 -аме 2246 -_Ñо 2245 -_le 2244 -аÑÑ 2243 -Ð¼ÐµÑ 2243 -вÑн 2240 -ÑÑ_ 2238 -com 2234 -Ð²Ð¾Ñ 2233 -пÑÑ 2233 -res 2227 -at_ 2226 -нÑв 2223 -оÑа 2223 -che 2221 -мал 2220 -rs_ 2218 -апи 2218 -ле_ 2216 -ÑÑа 2213 -ill 2212 -под 2211 -art 2210 -ики 2207 -Ð·Ð¼Ñ 2202 -_na 2200 -la_ 2197 -th_ 2197 -Ð´Ð½Ñ 2192 -_Ð±Ñ 2178 -Ñла 2174 -вÑ_ 2171 -нап 2169 -Ð·Ð°Ñ 2164 -um_ 2159 -iv_ 2158 -оÑо 2158 -ÑÐ¸Ñ 2158 -кÑл 2154 -lan 2149 -_ел 2148 -Ñоп 2147 -Ñкл 2147 -зÑо 2146 -Ñол 2145 -ge_ 2136 -_te 2133 -_ge 2132 -ÑÑи 2127 -Ð´ÐµÑ 2126 -tra 2123 -_he 2119 -нен 2116 -нÑм 2116 -Ð¿Ð°Ñ 2116 -_da 2115 -пла 2115 -Ñпо 2115 -ÑÑÑ 2114 -аÑÑ 2103 -Ð³Ð¾Ñ 2100 -ем_ 2100 -кÑи 2095 -ÑÐºÑ 2095 -вод 2089 -лан 2086 -дÑв 2073 -_do 2071 -_Ð¼Ñ 2070 -ÐµÐ»Ñ 2069 -all 2068 -man 2066 -Ñма 2066 -_зм 2064 -ts_ 2063 -аÑо 2058 -_Ð°Ñ 2057 -Ñло 2057 -ÑÑ_ 2056 -вип 2055 -ta_ 2054 -_sc 2052 -ви_ 2047 -гол 2047 -und 2041 -ÑÐ°Ñ 2037 -ей_ 2031 -per 2025 -Ñог 2022 -est 2020 -ÑÐ½Ñ 2014 -ÐºÐ°Ñ 2012 -Ñон 2012 -Ð¾Ð¼Ñ 2010 -еÑ_ 2009 -вий 2007 -Ñел 2003 -_Ñк 2002 -_pe 2001 -лог 2001 -ÐµÐºÑ 1994 -озв 1990 -ell 1988 -ко_ 1988 -ÑÑн 1986 -_оп 1984 -_ÑÑ 1984 -вÑо 1983 -_Ð¾Ñ 1980 -ино 1980 -ive 1977 -ава 1969 -_su 1968 -_ли 1968 -Ð¿Ð¾Ñ 1967 -ов_ 1966 -_bo 1964 -_si 1963 -иÑÑ 1961 -_ha 1960 -ÐµÐ»Ñ 1960 -_ге 1957 -ене 1955 -Ñем 1954 -_Ñз 1953 -нÑм 1951 -age 1949 -лÑ_ 1949 -дал 1947 -аÑ_ 1945 -str 1941 -лек 1941 -ran 1938 -na_ 1929 -Ð½Ð¸Ñ 1929 -оно 1929 -вÑÑ 1925 -Ñин 1925 -нÑе 1916 -Ñен 1912 -Ñка 1912 -нÑÑ 1908 -ve_ 1905 -Ð¾Ð½Ñ 1901 -ÑÑÑ 1900 -вим 1898 -Ð°Ð²Ñ 1897 -nal 1896 -pro 1893 -пак 1892 -еÑо 1889 -вог 1884 -ави 1883 -зав 1883 -ин_ 1881 -зал 1877 -me_ 1876 -Ñз_ 1876 -ÑÐ´Ñ 1874 -_gr 1873 -ове 1873 -пÑÑ 1870 -ons 1868 -win 1865 -ÑÑ_ 1864 -_tr 1863 -_en 1862 -иÑо 1862 -ÑÑÑ 1861 -sto 1860 -изн 1860 -sch 1858 -_ho 1857 -ant 1855 -иво 1855 -Ñм_ 1855 -ra_ 1853 -де_ 1851 -кам 1851 -лÑн 1847 -нав 1846 -ÑÑÑ 1842 -ess 1838 -_зо 1837 -ÑÑз 1835 -ÑÐºÑ 1833 -Ñдп 1833 -Ð¾Ð±Ñ 1832 -аÑе 1829 -Ñе_ 1829 -Ñ Ñд 1823 -ÑÑв 1822 -men 1820 -ty_ 1820 -по_ 1820 -_дв 1816 -ari 1815 -ÑÐ¸Ñ 1813 -ад_ 1811 -Ð¾ÐºÑ 1811 -опе 1811 -Ñнн 1805 -бли 1804 -Ñоб 1802 -гÑÑ 1801 -он_ 1801 -_Ñм 1799 -Ð³Ð°Ñ 1795 -иÑа 1790 -ian 1789 -Ñли 1789 -Ð¸Ð¿Ñ 1788 -оÑÑ 1787 -_Ñи 1786 -_й_ 1785 -_ле 1785 -оле 1784 -ar_ 1781 -вÑÑ 1780 -ÑÑк 1780 -нÑÑ 1779 -Ñам 1779 -йни 1778 -Ð²Ð¸Ñ 1777 -Ð½Ð°Ñ 1777 -зви 1776 -om_ 1775 -йно 1775 -mic 1772 -вле 1769 -оÑ_ 1763 -ect 1759 -ona 1759 -Ð»Ð¸Ñ 1758 -_fi 1757 -_зб 1757 -вин 1757 -rd_ 1755 -uni 1754 -авл 1754 -еÑÑ 1754 -Ñни 1753 -ss_ 1752 -_ÑÑ 1750 -ndo 1750 -нÑÑ 1749 -Ñва 1749 -cha 1747 -_Ð³Ñ 1746 -ÑÑÑ 1743 -ÑÐ»Ñ 1743 -Ð°Ð´Ñ 1741 -mar 1734 -ard 1733 -era 1732 -пом 1732 -азв 1731 -ern 1730 -Ð²Ð¸Ñ 1729 -_on 1728 -око 1726 -Ñко 1724 -ез_ 1722 -Ð¾Ð·Ñ 1721 -Ñни 1720 -дав 1718 -ÐµÐ½Ñ 1718 -Ð´Ð¸Ñ 1714 -ame 1711 -ак_ 1710 -лем 1710 -_lo 1700 -les 1698 -ган 1697 -Ð¸ÐºÑ 1695 -Ð½Ð°Ñ 1691 -nce 1690 -ber 1689 -ain 1683 -edi 1680 -Ð°Ð²Ñ 1673 -tur 1672 -_ÐºÑ 1668 -оÑл 1668 -ord 1667 -_ra 1663 -вон 1660 -ie_ 1659 -Ð¼Ð°Ñ 1659 -ÑÑа 1657 -лон 1656 -ÑÑÑ 1656 -Ñив 1651 -_ÑÑ 1647 -rat 1646 -ros 1645 -нÑи 1640 -нÑи 1638 -има 1637 -Ñим 1631 -Ð°Ð½Ñ 1630 -cor 1629 -it_ 1629 -rea 1629 -_ÑÑ 1628 -ric 1627 -_wa 1626 -еÑв 1626 -оме 1625 -икл 1624 -one 1623 -вил 1623 -пÑÑ 1623 -аÑа 1614 -лÑ_ 1613 -нÑÑ 1613 -nde 1611 -зво 1610 -мог 1609 -поз 1609 -Ñка 1609 -кÑн 1608 -дна 1606 -вла 1605 -зÑ_ 1604 -нан 1604 -ÑÑи 1604 -иÑо 1603 -Ñме 1603 -ÑÑ _ 1601 -os_ 1599 -ris 1599 -_ja 1598 -_Ñа 1598 -Ð¾Ð±Ñ 1597 -мÑж 1596 -оби 1594 -Ñад 1594 -Ñна 1593 -Ñка 1592 -ÐºÐ°Ñ 1589 -нÑÑ 1588 -dow 1587 -_as 1584 -кÑÑ 1584 -_Ð´Ñ 1582 -ÑÑд 1580 -Ñин 1579 -_sh 1576 -Ñог 1575 -tic 1574 -Ð´Ð°Ñ 1574 -зов 1574 -_вл 1572 -Ð°Ð¼Ñ 1571 -_au 1570 -Ð°Ð¿Ñ 1569 -баг 1568 -Ñно 1567 -Ñдк 1563 -par 1561 -ÑÑÑ 1561 -_c_ 1560 -бом 1560 -об_ 1560 -Ñал 1560 -они 1557 -пон 1552 -авн 1551 -vi_ 1550 -аÑе 1550 -ÑÑ_ 1550 -ws_ 1549 -Ñен 1546 -_br 1545 -Ð±Ð¾Ñ 1540 -оÑо 1539 -Ð¼Ð°Ñ 1538 -_iv 1537 -кал 1536 -Ñен 1536 -min 1535 -Ñеж 1533 -ÑÐ½Ñ 1532 -Ñни 1531 -nat 1530 -_sp 1525 -важ 1525 -_дж 1524 -лÑб 1523 -_Ñл 1520 -лÑз 1520 -ан_ 1519 -Ð¾Ð»Ñ 1519 -Ñел 1519 -ÑÑа 1517 -Ð¾Ð½Ñ 1515 -поп 1515 -ÑÑ _ 1514 -аÑк 1511 -ope 1509 -ема 1509 -Ð¸Ð½Ñ 1509 -ожн 1509 -Ñам 1509 -оÑг 1508 -his 1505 -бÑÑ 1505 -_x_ 1504 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +на_ 31005 +_на 30413 +_Ð¿Ñ 27783 +_по 25204 +ого 22546 +_Ñ_ 22494 +го_ 21091 +_за 20327 +ий_ 20183 +_в_ 20089 +Ð¸Ñ _ 20023 +_ви 19862 +_Ñо 19378 +_Ñа 19078 +_ÑÑ 18927 +Ñв_ 18762 +_ко 18571 +нÑ_ 18400 +Ð½Ð½Ñ 17493 +_Ð²Ñ 16666 +_до 16607 +Ñа_ 16281 +пÑо 16163 +ÑÑ_ 16121 +_Ñк 15972 +_з_ 15953 +_Ñ_ 15901 +ÑÑк 15538 +нÑ_ 14883 +_th 14863 +ÑÑо 14729 +вÑд 14181 +Ñи_ 13996 +the 13870 +ÑÑ_ 13749 +ом_ 13601 +ми_ 13215 +ÑÑа 12930 +еÑе 12896 +ÑÑ_ 12835 +Ð¿ÐµÑ 12814 +оÑ_ 12775 +Ñва 12375 +he_ 12322 +енн 11280 +_Ð¿Ñ 11279 +оÑ_ 11274 +лÑн 11135 +кÑ_ 10750 +ка_ 10741 +ван 10642 +_Ð±Ñ 10602 +иÑÑ 10393 +Ð°Ð»Ñ 10192 +_пе 10151 +Ð½Ð¸Ñ 10037 +ком 9970 +ÑÑÑ 9955 +мÑ_ 9909 +_Ñо 9841 +_не 9808 +on_ 9775 +er_ 9750 +ний 9654 +Ñко 9587 +аÑи 9472 +Ñо_ 9445 +ки_ 9293 +анн 9289 +ли_ 9275 +лÑ_ 9265 +Ð¾Ð»Ñ 9258 +ii_ 9168 +им_ 9115 +пÑи 9070 +но_ 8988 +_мо 8963 +оÑÑ 8878 +ла_ 8858 +ÑÐ»Ñ 8796 +_ма 8783 +ÑÑ_ 8699 +es_ 8469 +Ð¾Ð¼Ñ 8239 +ÑÑÑ 8186 +ÑÑÑ 8181 +лÑÑ 8175 +ала 8069 +не_ 7970 +до_ 7967 +Ñй_ 7880 +Ñоз 7833 +_of 7682 +_Ð¼Ñ 7638 +ÐºÐ¾Ñ 7525 +_Ñп 7516 +ion 7514 +_Ñн 7488 +_in 7412 +of_ 7353 +ови 7296 +Ð°ÐºÑ 7195 +ова 7175 +ÑÐ¾Ñ 7130 +ика 7023 +Ñол 6990 +нов 6948 +and 6892 +Ñки 6868 +оÑи 6860 +nd_ 6740 +_ÑÑ 6629 +_co 6626 +Ð´Ð»Ñ 6620 +ав_ 6588 +ног 6576 +оло 6571 +_дл 6564 +ÑÐ¸Ñ 6483 +ним 6435 +пÑд 6426 +_de 6391 +Ð°Ð½Ñ 6362 +_an 6356 +Ñд_ 6269 +Ñок 6196 +ÑÑи 6181 +Ð¾Ð²Ñ 6129 +Ñан 6125 +Ñак 6070 +ико 6038 +аÑÑ 6035 +Ð²ÐµÑ 6012 +Ñоб 5988 +_ве 5978 +аÑÑ 5953 +кий 5945 +ÑÑÑ 5902 +al_ 5899 +ово 5897 +лен 5834 +нÑ_ 5815 +ÑÑÑ 5800 +_ma 5784 +_ка 5754 +tio 5750 +али 5746 +ÑÑÑ 5735 +ÑÐ°Ñ 5723 +ÐµÐ½Ñ 5675 +ами 5660 +ÑÐµÑ 5657 +_Ñо 5635 +ни_ 5596 +ÑÑÑ 5553 +in_ 5547 +оÑо 5536 +_Ñе 5524 +_Ñе 5512 +_Ñв 5506 +кон 5476 +_де 5474 +_Ñе 5445 +_ба 5391 +ÑÑ_ 5385 +пов 5355 +бÑл 5331 +мÑÑ 5313 +ter 5306 +мен 5287 +ÑÑÑ 5276 +кла 5261 +зна 5256 +кÑи 5251 +an_ 5221 +Ñов 5161 +Ñод 5160 +_га 5147 +оÑÑ 5110 +_па 5103 +_ii 5092 +вик 5087 +ва_ 5054 +ном 5052 +она 5018 +ÑÑ_ 4997 +ÑÑÑ 4977 +оди 4956 +Ñед 4926 +ati 4905 +гал 4902 +ing 4883 +Ð¾ÐºÑ 4879 +ng_ 4863 +ког 4849 +ÑÑ_ 4841 +_ме 4826 +аÑ_ 4813 +ÑÑа 4809 +пÑа 4768 +вал 4763 +нÑÑ 4732 +ÑÑ_ 4726 +Ñки 4721 +_re 4712 +_Ñи 4712 +ло_ 4692 +ÑÑÑ 4662 +en_ 4628 +Ð½Ð¾Ñ 4628 +_во 4627 +is_ 4626 +_а_ 4607 +Ð°Ñ _ 4600 +ник 4599 +Ñк_ 4574 +омп 4565 +ent 4564 +гÑа 4562 +_Ð³Ñ 4560 +ани 4551 +Ð²Ð¾Ñ 4530 +ÑÑи 4524 +бÑл 4514 +за_ 4511 +Ñво 4476 +_Ñа 4467 +Ñик 4454 +але 4453 +Ñи_ 4444 +le_ 4443 +Ð°Ð»Ñ 4442 +_ал 4433 +ими 4416 +Ð²Ð°Ñ 4400 +Ñав 4384 +_Ð¾Ñ 4373 +дно 4373 +Ñдн 4368 +иÑн 4342 +ÑÑÑ 4312 +ÑÑ_ 4301 +ÑÑв 4280 +Ð»Ð°Ñ 4276 +кÑа 4273 +_об 4249 +лад 4248 +еÑÑ 4207 +Ð¾Ð²Ñ 4195 +одн 4169 +мÑн 4123 +_st 4116 +ков 4101 +us_ 4097 +зÑÑ 4094 +ÑÑÑ 4082 +Ñим 4081 +кÑ_ 4078 +ако 4074 +ÑÐ·Ñ 4072 +_i_ 4060 +ed_ 4058 +бÑв 4018 +Ñом 4003 +ÑÐµÑ 3991 +_Ð±Ñ 3985 +_Ñа 3978 +ано 3971 +_wi 3956 +_Ñ_ 3948 +re_ 3943 +Ñйн 3924 +_ÑÑ 3910 +еÑа 3908 +ив_ 3902 +нал 3886 +ÑÐ¿Ñ 3885 +лÑ_ 3870 +ени 3856 +нÑÑ 3856 +ÑÑе 3856 +_ca 3843 +Ñен 3841 +ÑÑз 3836 +_a_ 3826 +ÑÑн 3825 +дов 3822 +ia_ 3808 +кол 3793 +аÑо 3785 +кÑв 3779 +ÐºÐ¸Ñ 3762 +ÑÑв 3756 +овн 3752 +_ÑÑ 3751 +_pa 3742 +Ñал 3738 +_од 3736 +Ñан 3736 +Ñов 3734 +Ñно 3733 +ÑÐ¾Ñ 3713 +пол 3687 +_to 3680 +ÑÑв 3663 +Ñа_ 3662 +iii 3660 +Ñен 3657 +жен 3637 +ана 3619 +_li 3584 +иÑи 3576 +Ñив 3572 +_но 3553 +ÑÐ¾Ñ 3533 +кож 3524 +ÑÑо 3518 +Ñо_ 3513 +йÑÑ 3502 +_Ñе 3495 +ÑÐ¸Ñ 3488 +мож 3483 +лак 3473 +ÐºÐ¾Ñ 3468 +дин 3466 +_pr 3463 +Ð½Ð¾Ñ 3462 +Ñно 3423 +_Ñк 3421 +de_ 3419 +Ð¾Ð´Ñ 3408 +вол 3387 +Ñам 3380 +_йо 3369 +Ñал 3365 +вни 3364 +ÑÐ°Ñ 3363 +_vi 3357 +еле 3357 +_mi 3354 +оÑм 3343 +_ÐºÑ 3342 +ок_ 3339 +Ð¿Ð¸Ñ 3329 +Ñом 3326 +Ð¼ÐµÑ 3315 +вÑÑ 3299 +Ð¾Ð½Ñ 3283 +вÑ_ 3279 +or_ 3277 +Ñе_ 3249 +_Ñе 3234 +_го 3214 +_fo 3213 +ÑÐ¹Ñ 3195 +ден 3189 +лив 3184 +ага 3181 +_xv 3179 +Ñв_ 3161 +Ñни 3155 +_mo 3149 +анд 3145 +ne_ 3135 +Ð°Ð½Ñ 3132 +Ð½Ð°Ñ 3127 +_se 3113 +Ð¿Ð¾Ñ 3110 +ини 3107 +for 3105 +_зн 3103 +_s_ 3096 +_Ñа 3094 +ÐµÐºÑ 3090 +аÑÑ 3084 +_be 3081 +te_ 3074 +ож_ 3073 +_po 3068 +вно 3067 +вел 3058 +ÑÐ¾Ñ 3058 +нÑй 3057 +Ñни 3054 +вид 3046 +_Ð²Ñ 3044 +дÑ_ 3040 +_бе 3038 +ина 3038 +_xi 3022 +ÑÑ_ 3022 +_Ñ_ 3012 +_so 3011 +ce_ 3010 +ÑÑй 3010 +ÑÐ¾Ñ 3008 +або 3003 +лÑÑ 2994 +мов 2970 +мпа 2970 +Ð¿Ð¾Ñ 2969 +дом 2963 +дан 2960 +пÑв 2960 +же_ 2946 +Ñ Ð¾Ð´ 2945 +оÑÑ 2937 +Ñно 2922 +nte 2921 +ÑÑ_ 2914 +Ñав 2911 +дÑ_ 2891 +Ñво 2874 +_ch 2868 +_аб 2868 +иÑÑ 2859 +ÐµÐ½Ñ 2856 +оÑа 2855 +_Ð»Ñ 2852 +ÑÐ¸Ñ 2851 +to_ 2843 +_la 2839 +вÑÑ 2832 +ало 2824 +vii 2814 +ers 2812 +аÑн 2809 +иÑÑ 2808 +Ñин 2806 +ÑÐ°Ñ 2799 +_ан 2795 +Ð²Ð¸Ñ 2790 +еÑÑ 2789 +лик 2778 +st_ 2777 +Ð°Ð½Ñ 2775 +вав 2773 +бÑд 2771 +ÑÐºÑ 2769 +_di 2766 +еÑи 2763 +Ð½Ð¾Ñ 2760 +ман 2759 +кан 2757 +аÑа 2749 +лов 2746 +_sa 2738 +ry_ 2731 +ÑÐ½Ñ 2730 +Ñон 2727 +se_ 2717 +ica 2710 +лÑк 2709 +nt_ 2708 +най 2707 +ver 2705 +_да 2702 +_me 2701 +ено 2701 +обл 2700 +_Ñи 2699 +_al 2689 +йог 2685 +xvi 2676 +ели 2676 +бо_ 2667 +Ñвн 2659 +ic_ 2651 +Ð²Ð½Ñ 2650 +дже 2649 +зап 2649 +Ñем 2647 +ÑÐ°Ñ 2645 +во_ 2643 +аÑÑ 2642 +Ð¿Ð¾Ñ 2636 +ÑÑÑ 2636 +_un 2635 +ÑÐµÑ 2634 +ада 2630 +Ð¼Ð°Ñ 2621 +lin 2620 +бÑа 2613 +_ar 2608 +ила 2608 +Ð½Ð½Ñ 2607 +нÑÑ 2602 +Ñог 2595 +int 2589 +et_ 2588 +ÑÑÑ 2586 +ÑÐ¸Ñ 2580 +_ba 2575 +ива 2570 +опо 2566 +ns_ 2563 +оли 2561 +или 2556 +ода 2556 +_no 2554 +ate 2551 +аÑн 2549 +еÑÑ 2536 +Ð±ÐµÑ 2530 +ÑÐ²Ñ 2529 +еÑн 2527 +ist 2518 +нÑ_ 2511 +ch_ 2494 +Ð¾Ð»Ñ 2494 +ам_ 2491 +ix_ 2488 +ома 2484 +ine 2474 +ÑÑÑ 2471 +_fr 2470 +her 2467 +_зв 2465 +_Ð´Ñ 2461 +оÑÑ 2461 +tor 2452 +Ñез 2448 +_Ð½Ñ 2444 +Ñзн 2443 +Ñаз 2442 +_пл 2440 +Ñо_ 2439 +Ñко 2437 +_ro 2433 +ste 2428 +ким 2423 +Ð»Ð¾Ñ 2420 +Ð²Ð¸Ñ 2418 +Ð²Ð¾Ñ 2416 +_ва 2413 +нÑо 2408 +ÑÐ°Ñ 2404 +одо 2403 +_ав 2398 +ÑÑп 2395 +Ñн_ 2394 +sta 2391 +обо 2390 +der 2388 +оÑи 2388 +ивн 2386 +Ð»Ð¸Ñ 2383 +нÑв 2383 +ind 2377 +вÑй 2375 +_Ñи 2374 +оÑе 2372 +Ñи_ 2371 +el_ 2367 +Ð¾Ð³Ñ 2359 +ена 2352 +Ð´Ð¾Ñ 2351 +as_ 2350 +_ди 2348 +над 2347 +наз 2347 +_ÐºÑ 2341 +Ñва 2341 +дни 2337 +ÑÐµÑ 2337 +пÑе 2334 +омо 2329 +ÑÑ_ 2329 +_кл 2324 +_Ñо 2324 +Ñна 2314 +пан 2306 +лÑв 2304 +ди_ 2303 +нÑа 2302 +_is 2300 +ÑÑ_ 2300 +ÑÑ_ 2299 +Ñик 2298 +_ne 2287 +con 2287 +eri 2287 +Ð¾Ð´Ñ 2286 +_бо 2281 +без 2274 +ll_ 2271 +Ñм_ 2263 +Ñдо 2260 +еÑÑ 2256 +ма_ 2252 +ÐºÐ¾Ñ 2249 +аме 2246 +_Ñо 2245 +_le 2244 +аÑÑ 2243 +Ð¼ÐµÑ 2243 +вÑн 2240 +ÑÑ_ 2238 +com 2234 +Ð²Ð¾Ñ 2233 +пÑÑ 2233 +res 2227 +at_ 2226 +нÑв 2223 +оÑа 2223 +che 2221 +мал 2220 +rs_ 2218 +апи 2218 +ле_ 2216 +ÑÑа 2213 +ill 2212 +под 2211 +art 2210 +ики 2207 +Ð·Ð¼Ñ 2202 +_na 2200 +la_ 2197 +th_ 2197 +Ð´Ð½Ñ 2192 +_Ð±Ñ 2178 +Ñла 2174 +вÑ_ 2171 +нап 2169 +Ð·Ð°Ñ 2164 +um_ 2159 +iv_ 2158 +оÑо 2158 +ÑÐ¸Ñ 2158 +кÑл 2154 +lan 2149 +_ел 2148 +Ñоп 2147 +Ñкл 2147 +зÑо 2146 +Ñол 2145 +ge_ 2136 +_te 2133 +_ge 2132 +ÑÑи 2127 +Ð´ÐµÑ 2126 +tra 2123 +_he 2119 +нен 2116 +нÑм 2116 +Ð¿Ð°Ñ 2116 +_da 2115 +пла 2115 +Ñпо 2115 +ÑÑÑ 2114 +аÑÑ 2103 +Ð³Ð¾Ñ 2100 +ем_ 2100 +кÑи 2095 +ÑÐºÑ 2095 +вод 2089 +лан 2086 +дÑв 2073 +_do 2071 +_Ð¼Ñ 2070 +ÐµÐ»Ñ 2069 +all 2068 +man 2066 +Ñма 2066 +_зм 2064 +ts_ 2063 +аÑо 2058 +_Ð°Ñ 2057 +Ñло 2057 +ÑÑ_ 2056 +вип 2055 +ta_ 2054 +_sc 2052 +ви_ 2047 +гол 2047 +und 2041 +ÑÐ°Ñ 2037 +ей_ 2031 +per 2025 +Ñог 2022 +est 2020 +ÑÐ½Ñ 2014 +ÐºÐ°Ñ 2012 +Ñон 2012 +Ð¾Ð¼Ñ 2010 +еÑ_ 2009 +вий 2007 +Ñел 2003 +_Ñк 2002 +_pe 2001 +лог 2001 +ÐµÐºÑ 1994 +озв 1990 +ell 1988 +ко_ 1988 +ÑÑн 1986 +_оп 1984 +_ÑÑ 1984 +вÑо 1983 +_Ð¾Ñ 1980 +ино 1980 +ive 1977 +ава 1969 +_su 1968 +_ли 1968 +Ð¿Ð¾Ñ 1967 +ов_ 1966 +_bo 1964 +_si 1963 +иÑÑ 1961 +_ha 1960 +ÐµÐ»Ñ 1960 +_ге 1957 +ене 1955 +Ñем 1954 +_Ñз 1953 +нÑм 1951 +age 1949 +лÑ_ 1949 +дал 1947 +аÑ_ 1945 +str 1941 +лек 1941 +ran 1938 +na_ 1929 +Ð½Ð¸Ñ 1929 +оно 1929 +вÑÑ 1925 +Ñин 1925 +нÑе 1916 +Ñен 1912 +Ñка 1912 +нÑÑ 1908 +ve_ 1905 +Ð¾Ð½Ñ 1901 +ÑÑÑ 1900 +вим 1898 +Ð°Ð²Ñ 1897 +nal 1896 +pro 1893 +пак 1892 +еÑо 1889 +вог 1884 +ави 1883 +зав 1883 +ин_ 1881 +зал 1877 +me_ 1876 +Ñз_ 1876 +ÑÐ´Ñ 1874 +_gr 1873 +ове 1873 +пÑÑ 1870 +ons 1868 +win 1865 +ÑÑ_ 1864 +_tr 1863 +_en 1862 +иÑо 1862 +ÑÑÑ 1861 +sto 1860 +изн 1860 +sch 1858 +_ho 1857 +ant 1855 +иво 1855 +Ñм_ 1855 +ra_ 1853 +де_ 1851 +кам 1851 +лÑн 1847 +нав 1846 +ÑÑÑ 1842 +ess 1838 +_зо 1837 +ÑÑз 1835 +ÑÐºÑ 1833 +Ñдп 1833 +Ð¾Ð±Ñ 1832 +аÑе 1829 +Ñе_ 1829 +Ñ Ñд 1823 +ÑÑв 1822 +men 1820 +ty_ 1820 +по_ 1820 +_дв 1816 +ari 1815 +ÑÐ¸Ñ 1813 +ад_ 1811 +Ð¾ÐºÑ 1811 +опе 1811 +Ñнн 1805 +бли 1804 +Ñоб 1802 +гÑÑ 1801 +он_ 1801 +_Ñм 1799 +Ð³Ð°Ñ 1795 +иÑа 1790 +ian 1789 +Ñли 1789 +Ð¸Ð¿Ñ 1788 +оÑÑ 1787 +_Ñи 1786 +_й_ 1785 +_ле 1785 +оле 1784 +ar_ 1781 +вÑÑ 1780 +ÑÑк 1780 +нÑÑ 1779 +Ñам 1779 +йни 1778 +Ð²Ð¸Ñ 1777 +Ð½Ð°Ñ 1777 +зви 1776 +om_ 1775 +йно 1775 +mic 1772 +вле 1769 +оÑ_ 1763 +ect 1759 +ona 1759 +Ð»Ð¸Ñ 1758 +_fi 1757 +_зб 1757 +вин 1757 +rd_ 1755 +uni 1754 +авл 1754 +еÑÑ 1754 +Ñни 1753 +ss_ 1752 +_ÑÑ 1750 +ndo 1750 +нÑÑ 1749 +Ñва 1749 +cha 1747 +_Ð³Ñ 1746 +ÑÑÑ 1743 +ÑÐ»Ñ 1743 +Ð°Ð´Ñ 1741 +mar 1734 +ard 1733 +era 1732 +пом 1732 +азв 1731 +ern 1730 +Ð²Ð¸Ñ 1729 +_on 1728 +око 1726 +Ñко 1724 +ез_ 1722 +Ð¾Ð·Ñ 1721 +Ñни 1720 +дав 1718 +ÐµÐ½Ñ 1718 +Ð´Ð¸Ñ 1714 +ame 1711 +ак_ 1710 +лем 1710 +_lo 1700 +les 1698 +ган 1697 +Ð¸ÐºÑ 1695 +Ð½Ð°Ñ 1691 +nce 1690 +ber 1689 +ain 1683 +edi 1680 +Ð°Ð²Ñ 1673 +tur 1672 +_ÐºÑ 1668 +оÑл 1668 +ord 1667 +_ra 1663 +вон 1660 +ie_ 1659 +Ð¼Ð°Ñ 1659 +ÑÑа 1657 +лон 1656 +ÑÑÑ 1656 +Ñив 1651 +_ÑÑ 1647 +rat 1646 +ros 1645 +нÑи 1640 +нÑи 1638 +има 1637 +Ñим 1631 +Ð°Ð½Ñ 1630 +cor 1629 +it_ 1629 +rea 1629 +_ÑÑ 1628 +ric 1627 +_wa 1626 +еÑв 1626 +оме 1625 +икл 1624 +one 1623 +вил 1623 +пÑÑ 1623 +аÑа 1614 +лÑ_ 1613 +нÑÑ 1613 +nde 1611 +зво 1610 +мог 1609 +поз 1609 +Ñка 1609 +кÑн 1608 +дна 1606 +вла 1605 +зÑ_ 1604 +нан 1604 +ÑÑи 1604 +иÑо 1603 +Ñме 1603 +ÑÑ _ 1601 +os_ 1599 +ris 1599 +_ja 1598 +_Ñа 1598 +Ð¾Ð±Ñ 1597 +мÑж 1596 +оби 1594 +Ñад 1594 +Ñна 1593 +Ñка 1592 +ÐºÐ°Ñ 1589 +нÑÑ 1588 +dow 1587 +_as 1584 +кÑÑ 1584 +_Ð´Ñ 1582 +ÑÑд 1580 +Ñин 1579 +_sh 1576 +Ñог 1575 +tic 1574 +Ð´Ð°Ñ 1574 +зов 1574 +_вл 1572 +Ð°Ð¼Ñ 1571 +_au 1570 +Ð°Ð¿Ñ 1569 +баг 1568 +Ñно 1567 +Ñдк 1563 +par 1561 +ÑÑÑ 1561 +_c_ 1560 +бом 1560 +об_ 1560 +Ñал 1560 +они 1557 +пон 1552 +авн 1551 +vi_ 1550 +аÑе 1550 +ÑÑ_ 1550 +ws_ 1549 +Ñен 1546 +_br 1545 +Ð±Ð¾Ñ 1540 +оÑо 1539 +Ð¼Ð°Ñ 1538 +_iv 1537 +кал 1536 +Ñен 1536 +min 1535 +Ñеж 1533 +ÑÐ½Ñ 1532 +Ñни 1531 +nat 1530 +_sp 1525 +важ 1525 +_дж 1524 +лÑб 1523 +_Ñл 1520 +лÑз 1520 +ан_ 1519 +Ð¾Ð»Ñ 1519 +Ñел 1519 +ÑÑа 1517 +Ð¾Ð½Ñ 1515 +поп 1515 +ÑÑ _ 1514 +аÑк 1511 +ope 1509 +ема 1509 +Ð¸Ð½Ñ 1509 +ожн 1509 +Ñам 1509 +оÑг 1508 +his 1505 +бÑÑ 1505 +_x_ 1504
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (original) +++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java Mon Nov 17 10:32:23 2014 @@ -1,150 +1,150 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.sax; - -import static org.junit.Assert.assertEquals; -import org.junit.Test; -import org.xml.sax.ContentHandler; -import org.xml.sax.helpers.AttributesImpl; - -public class SerializerTest { - - @Test - public void testToTextContentHandler() throws Exception { - assertStartDocument("", new ToTextContentHandler()); - assertCharacters("content", new ToTextContentHandler()); - assertCharacterEscaping("<&\">", new ToTextContentHandler()); - assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler()); - assertEmptyElement("", new ToTextContentHandler()); - assertEmptyElementWithAttributes("", new ToTextContentHandler()); - assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler()); - assertElement("content", new ToTextContentHandler()); - assertElementWithAttributes("content", new ToTextContentHandler()); - } - - @Test - public void testToXMLContentHandler() throws Exception { - assertStartDocument("", new ToXMLContentHandler()); - assertStartDocument( - "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", - new ToXMLContentHandler("UTF-8")); - assertCharacters("content", new ToXMLContentHandler()); - assertCharacterEscaping("<&\">", new ToXMLContentHandler()); - assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler()); - assertEmptyElement("<br />", new ToXMLContentHandler()); - assertEmptyElementWithAttributes( - "<meta name=\"foo\" value=\"bar\" />", - new ToXMLContentHandler()); - assertEmptyElementWithAttributeEscaping( - "<p class=\"<&">\" />", - new ToXMLContentHandler()); - assertElement("<p>content</p>", new ToXMLContentHandler()); - assertElementWithAttributes( - "<p class=\"test\">content</p>", - new ToXMLContentHandler()); - } - - @Test - public void testToHTMLContentHandler() throws Exception { - assertStartDocument("", new ToHTMLContentHandler()); - assertCharacters("content", new ToHTMLContentHandler()); - assertCharacterEscaping("<&\">", new ToHTMLContentHandler()); - assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler()); - assertEmptyElement("<br>", new ToHTMLContentHandler()); - assertEmptyElementWithAttributes( - "<meta name=\"foo\" value=\"bar\">", - new ToHTMLContentHandler()); - assertEmptyElementWithAttributeEscaping( - "<p class=\"<&">\"></p>", - new ToHTMLContentHandler()); - assertElement("<p>content</p>", new ToHTMLContentHandler()); - assertElementWithAttributes( - "<p class=\"test\">content</p>", - new ToHTMLContentHandler()); - } - - private void assertStartDocument(String expected, ContentHandler handler) - throws Exception { - handler.startDocument(); - assertEquals(expected, handler.toString()); - } - - private void assertCharacters(String expected, ContentHandler handler) - throws Exception { - handler.characters("content".toCharArray(), 0, 7); - assertEquals(expected, handler.toString()); - } - - private void assertCharacterEscaping( - String expected, ContentHandler handler) throws Exception { - handler.characters("<&\">".toCharArray(), 0, 4); - assertEquals(expected, handler.toString()); - } - - private void assertIgnorableWhitespace( - String expected, ContentHandler handler) throws Exception { - handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4); - assertEquals(expected, handler.toString()); - } - - private void assertEmptyElement(String expected, ContentHandler handler) - throws Exception { - AttributesImpl attributes = new AttributesImpl(); - handler.startElement("", "br", "br", attributes); - handler.endElement("", "br", "br"); - assertEquals(expected, handler.toString()); - } - - private void assertEmptyElementWithAttributes( - String expected, ContentHandler handler) throws Exception { - AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute("", "name", "name", "CDATA", "foo"); - attributes.addAttribute("", "value", "value", "CDATA", "bar"); - handler.startElement("", "meta", "meta", attributes); - handler.endElement("", "meta", "meta"); - assertEquals(expected, handler.toString()); - } - - private void assertEmptyElementWithAttributeEscaping( - String expected, ContentHandler handler) throws Exception { - AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute("", "class", "class", "CDATA", "<&\">"); - handler.startElement("", "p", "p", attributes); - handler.endElement("", "p", "p"); - assertEquals(expected, handler.toString()); - } - - private void assertElement( - String expected, ContentHandler handler) throws Exception { - AttributesImpl attributes = new AttributesImpl(); - handler.startElement("", "p", "p", attributes); - handler.characters("content".toCharArray(), 0, 7); - handler.endElement("", "p", "p"); - assertEquals(expected, handler.toString()); - } - - private void assertElementWithAttributes( - String expected, ContentHandler handler) throws Exception { - AttributesImpl attributes = new AttributesImpl(); - attributes.addAttribute("", "class", "class", "CDATA", "test"); - handler.startElement("", "p", "p", attributes); - handler.characters("content".toCharArray(), 0, 7); - handler.endElement("", "p", "p"); - assertEquals(expected, handler.toString()); - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.sax; + +import static org.junit.Assert.assertEquals; +import org.junit.Test; +import org.xml.sax.ContentHandler; +import org.xml.sax.helpers.AttributesImpl; + +public class SerializerTest { + + @Test + public void testToTextContentHandler() throws Exception { + assertStartDocument("", new ToTextContentHandler()); + assertCharacters("content", new ToTextContentHandler()); + assertCharacterEscaping("<&\">", new ToTextContentHandler()); + assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler()); + assertEmptyElement("", new ToTextContentHandler()); + assertEmptyElementWithAttributes("", new ToTextContentHandler()); + assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler()); + assertElement("content", new ToTextContentHandler()); + assertElementWithAttributes("content", new ToTextContentHandler()); + } + + @Test + public void testToXMLContentHandler() throws Exception { + assertStartDocument("", new ToXMLContentHandler()); + assertStartDocument( + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + new ToXMLContentHandler("UTF-8")); + assertCharacters("content", new ToXMLContentHandler()); + assertCharacterEscaping("<&\">", new ToXMLContentHandler()); + assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler()); + assertEmptyElement("<br />", new ToXMLContentHandler()); + assertEmptyElementWithAttributes( + "<meta name=\"foo\" value=\"bar\" />", + new ToXMLContentHandler()); + assertEmptyElementWithAttributeEscaping( + "<p class=\"<&">\" />", + new ToXMLContentHandler()); + assertElement("<p>content</p>", new ToXMLContentHandler()); + assertElementWithAttributes( + "<p class=\"test\">content</p>", + new ToXMLContentHandler()); + } + + @Test + public void testToHTMLContentHandler() throws Exception { + assertStartDocument("", new ToHTMLContentHandler()); + assertCharacters("content", new ToHTMLContentHandler()); + assertCharacterEscaping("<&\">", new ToHTMLContentHandler()); + assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler()); + assertEmptyElement("<br>", new ToHTMLContentHandler()); + assertEmptyElementWithAttributes( + "<meta name=\"foo\" value=\"bar\">", + new ToHTMLContentHandler()); + assertEmptyElementWithAttributeEscaping( + "<p class=\"<&">\"></p>", + new ToHTMLContentHandler()); + assertElement("<p>content</p>", new ToHTMLContentHandler()); + assertElementWithAttributes( + "<p class=\"test\">content</p>", + new ToHTMLContentHandler()); + } + + private void assertStartDocument(String expected, ContentHandler handler) + throws Exception { + handler.startDocument(); + assertEquals(expected, handler.toString()); + } + + private void assertCharacters(String expected, ContentHandler handler) + throws Exception { + handler.characters("content".toCharArray(), 0, 7); + assertEquals(expected, handler.toString()); + } + + private void assertCharacterEscaping( + String expected, ContentHandler handler) throws Exception { + handler.characters("<&\">".toCharArray(), 0, 4); + assertEquals(expected, handler.toString()); + } + + private void assertIgnorableWhitespace( + String expected, ContentHandler handler) throws Exception { + handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4); + assertEquals(expected, handler.toString()); + } + + private void assertEmptyElement(String expected, ContentHandler handler) + throws Exception { + AttributesImpl attributes = new AttributesImpl(); + handler.startElement("", "br", "br", attributes); + handler.endElement("", "br", "br"); + assertEquals(expected, handler.toString()); + } + + private void assertEmptyElementWithAttributes( + String expected, ContentHandler handler) throws Exception { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute("", "name", "name", "CDATA", "foo"); + attributes.addAttribute("", "value", "value", "CDATA", "bar"); + handler.startElement("", "meta", "meta", attributes); + handler.endElement("", "meta", "meta"); + assertEquals(expected, handler.toString()); + } + + private void assertEmptyElementWithAttributeEscaping( + String expected, ContentHandler handler) throws Exception { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute("", "class", "class", "CDATA", "<&\">"); + handler.startElement("", "p", "p", attributes); + handler.endElement("", "p", "p"); + assertEquals(expected, handler.toString()); + } + + private void assertElement( + String expected, ContentHandler handler) throws Exception { + AttributesImpl attributes = new AttributesImpl(); + handler.startElement("", "p", "p", attributes); + handler.characters("content".toCharArray(), 0, 7); + handler.endElement("", "p", "p"); + assertEquals(expected, handler.toString()); + } + + private void assertElementWithAttributes( + String expected, ContentHandler handler) throws Exception { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute("", "class", "class", "CDATA", "test"); + handler.startElement("", "p", "p", attributes); + handler.characters("content".toCharArray(), 0, 7); + handler.endElement("", "p", "p"); + assertEquals(expected, handler.toString()); + } + +} Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java Mon Nov 17 10:32:23 2014 @@ -22,7 +22,6 @@ import java.io.InputStream; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; -import java.util.Iterator; import java.util.Set; import org.apache.tika.exception.TikaException; @@ -34,6 +33,7 @@ import org.apache.tika.parser.chm.access import org.apache.tika.parser.chm.core.ChmExtractor; import org.apache.tika.parser.html.HtmlParser; import org.apache.tika.sax.BodyContentHandler; +import org.apache.tika.sax.EmbeddedContentHandler; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -49,10 +49,12 @@ public class ChmParser extends AbstractP MediaType.application("chm"), MediaType.application("x-chm")))); + @Override public Set<MediaType> getSupportedTypes(ParseContext context) { return SUPPORTED_TYPES; } + @Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { @@ -66,36 +68,40 @@ public class ChmParser extends AbstractP xhtml.startDocument(); for (DirectoryListingEntry entry : chmExtractor.getChmDirList().getDirectoryListingEntryList()) { - if (entry.getName().endsWith(".html") || entry.getName().endsWith(".htm")) { - xhtml.characters(extract(chmExtractor.extractChmEntry(entry))); + final String entryName = entry.getName(); + if (entryName.endsWith(".html") + || entryName.endsWith(".htm") + ) { +// AttributesImpl attrs = new AttributesImpl(); +// attrs.addAttribute("", "name", "name", "String", entryName); +// xhtml.startElement("", "document", "document", attrs); + + byte[] data = chmExtractor.extractChmEntry(entry); + + parsePage(data, xhtml); + +// xhtml.endElement("", "", "document"); } } xhtml.endDocument(); } - /** - * Extracts data from byte[] - */ - private String extract(byte[] byteObject) throws TikaException {// throws IOException - StringBuilder wBuf = new StringBuilder(); + + private void parsePage(byte[] byteObject, ContentHandler xhtml) throws TikaException {// throws IOException InputStream stream = null; Metadata metadata = new Metadata(); HtmlParser htmlParser = new HtmlParser(); - BodyContentHandler handler = new BodyContentHandler(-1);// -1 + ContentHandler handler = new EmbeddedContentHandler(new BodyContentHandler(xhtml));// -1 ParseContext parser = new ParseContext(); try { stream = new ByteArrayInputStream(byteObject); htmlParser.parse(stream, handler, metadata, parser); - wBuf.append(handler.toString() - + System.getProperty("line.separator")); } catch (SAXException e) { throw new RuntimeException(e); } catch (IOException e) { // Pushback overflow from tagsoup } - return wBuf.toString(); } - - + } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java Mon Nov 17 10:32:23 2014 @@ -16,13 +16,14 @@ */ package org.apache.tika.parser.chm.accessor; +import java.io.UnsupportedEncodingException; import java.math.BigInteger; import java.util.ArrayList; import java.util.List; - import org.apache.tika.exception.TikaException; import org.apache.tika.parser.chm.core.ChmCommons; import org.apache.tika.parser.chm.core.ChmConstants; +import org.apache.tika.parser.chm.exception.ChmParsingException; /** * Holds chm listing entries @@ -101,15 +102,6 @@ public class ChmDirectoryListingSet { } /** - * Gets place holder - * - * @return place holder - */ - private int getPlaceHolder() { - return placeHolder; - } - - /** * Sets place holder * * @param placeHolder @@ -118,13 +110,14 @@ public class ChmDirectoryListingSet { this.placeHolder = placeHolder; } + private ChmPmglHeader PMGLheader; /** * Enumerates chm directory listing entries * * @param chmItsHeader - * chm itsf header + * chm itsf PMGLheader * @param chmItspHeader - * chm itsp header + * chm itsp PMGLheader */ private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader, ChmItspHeader chmItspHeader) { @@ -136,33 +129,19 @@ public class ChmDirectoryListingSet { setDataOffset(chmItsHeader.getDataOffset()); /* loops over all pmgls */ - int previous_index = 0; byte[] dir_chunk = null; - for (int i = startPmgl; i <= stopPmgl; i++) { - int data_copied = ((1 + i) * (int) chmItspHeader.getBlock_len()) - + dir_offset; - if (i == 0) { - dir_chunk = new byte[(int) chmItspHeader.getBlock_len()]; - // dir_chunk = Arrays.copyOfRange(getData(), dir_offset, - // (((1+i) * (int)chmItspHeader.getBlock_len()) + - // dir_offset)); - dir_chunk = ChmCommons - .copyOfRange(getData(), dir_offset, - (((1 + i) * (int) chmItspHeader - .getBlock_len()) + dir_offset)); - previous_index = data_copied; - } else { - dir_chunk = new byte[(int) chmItspHeader.getBlock_len()]; - // dir_chunk = Arrays.copyOfRange(getData(), previous_index, - // (((1+i) * (int)chmItspHeader.getBlock_len()) + - // dir_offset)); - dir_chunk = ChmCommons - .copyOfRange(getData(), previous_index, - (((1 + i) * (int) chmItspHeader - .getBlock_len()) + dir_offset)); - previous_index = data_copied; - } + for (int i = startPmgl; i>=0; ) { + dir_chunk = new byte[(int) chmItspHeader.getBlock_len()]; + int start = i * (int) chmItspHeader.getBlock_len() + dir_offset; + dir_chunk = ChmCommons + .copyOfRange(getData(), start, + start +(int) chmItspHeader.getBlock_len()); + + PMGLheader = new ChmPmglHeader(); + PMGLheader.parse(dir_chunk, PMGLheader); enumerateOneSegment(dir_chunk); + + i=PMGLheader.getBlockNext(); dir_chunk = null; } } catch (Exception e) { @@ -202,112 +181,142 @@ public class ChmDirectoryListingSet { } } + public static final boolean startsWith(byte[] data, String prefix) { + for (int i=0; i<prefix.length(); i++) { + if (data[i]!=prefix.charAt(i)) { + return false; + } + } + + return true; + } /** * Enumerates chm directory listing entries in single chm segment * * @param dir_chunk */ - private void enumerateOneSegment(byte[] dir_chunk) { - try { + private void enumerateOneSegment(byte[] dir_chunk) throws ChmParsingException { +// try { if (dir_chunk != null) { + int header_len; + if (startsWith(dir_chunk, ChmConstants.CHM_PMGI_MARKER)) { + header_len = ChmConstants.CHM_PMGI_LEN; + return; //skip PMGI + } + else if (startsWith(dir_chunk, ChmConstants.PMGL)) { + header_len = ChmConstants.CHM_PMGL_LEN; + } + else { + throw new ChmParsingException("Bad dir entry block."); + } - int indexWorkData = ChmCommons.indexOf(dir_chunk, - "::".getBytes("UTF-8")); - int indexUserData = ChmCommons.indexOf(dir_chunk, - "/".getBytes("UTF-8")); - - if (indexUserData < indexWorkData) - setPlaceHolder(indexUserData); - else - setPlaceHolder(indexWorkData); - - if (getPlaceHolder() > 0 - && dir_chunk[getPlaceHolder() - 1] != 115) {// #{ - do { - if (dir_chunk[getPlaceHolder() - 1] > 0) { - DirectoryListingEntry dle = new DirectoryListingEntry(); - - // two cases: 1. when dir_chunk[getPlaceHolder() - - // 1] == 0x73 - // 2. when dir_chunk[getPlaceHolder() + 1] == 0x2f - doNameCheck(dir_chunk, dle); - - // dle.setName(new - // String(Arrays.copyOfRange(dir_chunk, - // getPlaceHolder(), (getPlaceHolder() + - // dle.getNameLength())))); - dle.setName(new String(ChmCommons.copyOfRange( - dir_chunk, getPlaceHolder(), - (getPlaceHolder() + dle.getNameLength())), "UTF-8")); - checkControlData(dle); - checkResetTable(dle); - setPlaceHolder(getPlaceHolder() - + dle.getNameLength()); - - /* Sets entry type */ - if (getPlaceHolder() < dir_chunk.length - && dir_chunk[getPlaceHolder()] == 0) - dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED); - else - dle.setEntryType(ChmCommons.EntryType.COMPRESSED); - - setPlaceHolder(getPlaceHolder() + 1); - dle.setOffset(getEncint(dir_chunk)); - dle.setLength(getEncint(dir_chunk)); - getDirectoryListingEntryList().add(dle); - } else - setPlaceHolder(getPlaceHolder() + 1); - - } while (hasNext(dir_chunk)); + placeHolder = header_len; + //setPlaceHolder(header_len); + while (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace() + /*&& dir_chunk[placeHolder - 1] != 115*/) + { + //get entry name length + int strlen = 0;// = getEncint(data); + byte temp; + while ((temp=dir_chunk[placeHolder++]) >= 0x80) + { + strlen <<= 7; + strlen += temp & 0x7f; + } + + strlen = (strlen << 7) + temp & 0x7f; + + if (strlen>dir_chunk.length) { + throw new ChmParsingException("Bad data of a string length."); + } + + DirectoryListingEntry dle = new DirectoryListingEntry(); + dle.setNameLength(strlen); + try { + dle.setName(new String(ChmCommons.copyOfRange( + dir_chunk, placeHolder, + (placeHolder + dle.getNameLength())), "UTF-8")); + } catch (UnsupportedEncodingException ex) { + dle.setName(new String(dir_chunk, placeHolder, placeHolder + dle.getNameLength())); + } + checkControlData(dle); + checkResetTable(dle); + setPlaceHolder(placeHolder + + dle.getNameLength()); + + /* Sets entry type */ + if (placeHolder < dir_chunk.length + && dir_chunk[placeHolder] == 0) + dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED); + else + dle.setEntryType(ChmCommons.EntryType.COMPRESSED); + + setPlaceHolder(placeHolder + 1); + dle.setOffset(getEncint(dir_chunk)); + dle.setLength(getEncint(dir_chunk)); + getDirectoryListingEntryList().add(dle); } + +// int indexWorkData = ChmCommons.indexOf(dir_chunk, +// "::".getBytes("UTF-8")); +// int indexUserData = ChmCommons.indexOf(dir_chunk, +// "/".getBytes("UTF-8")); +// +// if (indexUserData>=0 && indexUserData < indexWorkData) +// setPlaceHolder(indexUserData); +// else if (indexWorkData>=0) { +// setPlaceHolder(indexWorkData); +// } +// else { +// setPlaceHolder(indexUserData); +// } +// +// if (placeHolder > 0 && placeHolder < dir_chunk.length - PMGLheader.getFreeSpace() +// && dir_chunk[placeHolder - 1] != 115) {// #{ +// do { +// if (dir_chunk[placeHolder - 1] > 0) { +// DirectoryListingEntry dle = new DirectoryListingEntry(); +// +// // two cases: 1. when dir_chunk[placeHolder - +// // 1] == 0x73 +// // 2. when dir_chunk[placeHolder + 1] == 0x2f +// doNameCheck(dir_chunk, dle); +// +// // dle.setName(new +// // String(Arrays.copyOfRange(dir_chunk, +// // placeHolder, (placeHolder + +// // dle.getNameLength())))); +// dle.setName(new String(ChmCommons.copyOfRange( +// dir_chunk, placeHolder, +// (placeHolder + dle.getNameLength())), "UTF-8")); +// checkControlData(dle); +// checkResetTable(dle); +// setPlaceHolder(placeHolder +// + dle.getNameLength()); +// +// /* Sets entry type */ +// if (placeHolder < dir_chunk.length +// && dir_chunk[placeHolder] == 0) +// dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED); +// else +// dle.setEntryType(ChmCommons.EntryType.COMPRESSED); +// +// setPlaceHolder(placeHolder + 1); +// dle.setOffset(getEncint(dir_chunk)); +// dle.setLength(getEncint(dir_chunk)); +// getDirectoryListingEntryList().add(dle); +// } else +// setPlaceHolder(placeHolder + 1); +// +// } while (nextEntry(dir_chunk)); +// } } - } catch (Exception e) { - e.printStackTrace(); - } - } - - /** - * Checks if a name and name length are correct. If not then handles it as - * follows: 1. when dir_chunk[getPlaceHolder() - 1] == 0x73 ('/') 2. when - * dir_chunk[getPlaceHolder() + 1] == 0x2f ('s') - * - * @param dir_chunk - * @param dle - */ - private void doNameCheck(byte[] dir_chunk, DirectoryListingEntry dle) { - if (dir_chunk[getPlaceHolder() - 1] == 0x73) { - dle.setNameLength(dir_chunk[getPlaceHolder() - 1] & 0x21); - } else if (dir_chunk[getPlaceHolder() + 1] == 0x2f) { - dle.setNameLength(dir_chunk[getPlaceHolder()]); - setPlaceHolder(getPlaceHolder() + 1); - } else { - dle.setNameLength(dir_chunk[getPlaceHolder() - 1]); - } +// } catch (Exception e) { +// e.printStackTrace(); +// } } - /** - * Checks if it's possible move further on byte[] - * - * @param dir_chunk - * - * @return boolean - */ - private boolean hasNext(byte[] dir_chunk) { - while (getPlaceHolder() < dir_chunk.length) { - if (dir_chunk[getPlaceHolder()] == 47 - && dir_chunk[getPlaceHolder() + 1] != ':') { - setPlaceHolder(getPlaceHolder()); - return true; - } else if (dir_chunk[getPlaceHolder()] == ':' - && dir_chunk[getPlaceHolder() + 1] == ':') { - setPlaceHolder(getPlaceHolder()); - return true; - } else - setPlaceHolder(getPlaceHolder() + 1); - } - return false; - } /** * Returns encrypted integer @@ -321,26 +330,20 @@ public class ChmDirectoryListingSet { BigInteger bi = BigInteger.ZERO; byte[] nb = new byte[1]; - if (getPlaceHolder() < data_chunk.length) { - while ((ob = data_chunk[getPlaceHolder()]) < 0) { + if (placeHolder < data_chunk.length) { + while ((ob = data_chunk[placeHolder]) < 0) { nb[0] = (byte) ((ob & 0x7f)); bi = bi.shiftLeft(7).add(new BigInteger(nb)); - setPlaceHolder(getPlaceHolder() + 1); + setPlaceHolder(placeHolder + 1); } nb[0] = (byte) ((ob & 0x7f)); bi = bi.shiftLeft(7).add(new BigInteger(nb)); - setPlaceHolder(getPlaceHolder() + 1); + setPlaceHolder(placeHolder + 1); } return bi.intValue(); } /** - * @param args - */ - public static void main(String[] args) { - } - - /** * Sets chm directory listing entry list * * @param dlel Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Mon Nov 17 10:32:23 2014 @@ -389,10 +389,10 @@ public class ChmItsfHeader implements Ch if (4 > this.getDataRemained()) throw new TikaException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; this.setCurrentPlace(this.getCurrentPlace() + 4); this.setDataRemained(this.getDataRemained() - 4); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java Mon Nov 17 10:32:23 2014 @@ -147,10 +147,10 @@ public class ChmItspHeader implements Ch ChmAssert.assertByteArrayNotNull(data); if (4 > this.getDataRemained()) throw new TikaException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; this.setCurrentPlace(this.getCurrentPlace() + 4); this.setDataRemained(this.getDataRemained() - 4); @@ -161,10 +161,10 @@ public class ChmItspHeader implements Ch ChmAssert.assertByteArrayNotNull(data); if (4 > dataLenght) throw new TikaException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; setDataRemained(this.getDataRemained() - 4); this.setCurrentPlace(this.getCurrentPlace() + 4); @@ -556,10 +556,4 @@ public class ChmItspHeader implements Ch if (chmItspHeader.getHeader_len() != ChmConstants.CHM_ITSP_V1_LEN) throw new ChmParsingException("!= ChmConstants.CHM_ITSP_V1_LEN"); } - - /** - * @param args - */ - public static void main(String[] args) { - } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java Mon Nov 17 10:32:23 2014 @@ -158,10 +158,10 @@ public class ChmLzxcResetTable implement private long unmarshalUInt32(byte[] data, long dest) throws TikaException { ChmAssert.assertByteArrayNotNull(data); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; setDataRemained(this.getDataRemained() - 4); this.setCurrentPlace(this.getCurrentPlace() + 4); @@ -318,13 +318,6 @@ public class ChmLzxcResetTable implement this.block_len = block_len; } - /** - * @param args - */ - public static void main(String[] args) { - - } - // @Override public void parse(byte[] data, ChmLzxcResetTable chmLzxcResetTable) throws TikaException { setDataRemained(data.length); Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java Mon Nov 17 10:32:23 2014 @@ -105,10 +105,10 @@ public class ChmPmgiHeader implements Ch if (4 > getDataRemained()) throw new ChmParsingException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; setDataRemained(this.getDataRemained() - 4); this.setCurrentPlace(this.getCurrentPlace() + 4); @@ -187,11 +187,4 @@ public class ChmPmgiHeader implements Ch } } - - /** - * @param args - */ - public static void main(String[] args) { - - } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java Mon Nov 17 10:32:23 2014 @@ -16,14 +16,13 @@ */ package org.apache.tika.parser.chm.accessor; +import java.io.UnsupportedEncodingException; + import org.apache.tika.exception.TikaException; import org.apache.tika.parser.chm.assertion.ChmAssert; import org.apache.tika.parser.chm.core.ChmConstants; import org.apache.tika.parser.chm.exception.ChmParsingException; -import java.io.UnsupportedEncodingException; -import java.util.UnknownFormatConversionException; - /** * Description There are two types of directory chunks -- index chunks, and * listing chunks. The index chunk will be omitted if there is only one listing @@ -100,7 +99,10 @@ public class ChmPmglHeader implements Ch return free_space; } - public void setFreeSpace(long free_space) { + public void setFreeSpace(long free_space) throws TikaException { + if (free_space < 0) { + throw new TikaException("Bad PMGLheader.FreeSpace="+free_space); + } this.free_space = free_space; } @@ -128,28 +130,30 @@ public class ChmPmglHeader implements Ch this.setDataRemained(this.getDataRemained() - count); } - private int unmarshalInt32(byte[] data, int dest) throws TikaException { + private int unmarshalInt32(byte[] data) throws TikaException { ChmAssert.assertByteArrayNotNull(data); + int dest; if (4 > this.getDataRemained()) throw new TikaException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; this.setCurrentPlace(this.getCurrentPlace() + 4); this.setDataRemained(this.getDataRemained() - 4); return dest; } - private long unmarshalUInt32(byte[] data, long dest) throws ChmParsingException { + private long unmarshalUInt32(byte[] data) throws ChmParsingException { ChmAssert.assertByteArrayNotNull(data); + long dest; if (4 > getDataRemained()) throw new ChmParsingException("4 > dataLenght"); - dest = data[this.getCurrentPlace()] - | data[this.getCurrentPlace() + 1] << 8 - | data[this.getCurrentPlace() + 2] << 16 - | data[this.getCurrentPlace() + 3] << 24; + dest = (data[this.getCurrentPlace()] & 0xff) + | (data[this.getCurrentPlace() + 1] & 0xff) << 8 + | (data[this.getCurrentPlace() + 2] & 0xff) << 16 + | (data[this.getCurrentPlace() + 3] & 0xff) << 24; setDataRemained(this.getDataRemained() - 4); this.setCurrentPlace(this.getCurrentPlace() + 4); @@ -165,14 +169,10 @@ public class ChmPmglHeader implements Ch /* unmarshal fields */ chmPmglHeader.unmarshalCharArray(data, chmPmglHeader, ChmConstants.CHM_SIGNATURE_LEN); - chmPmglHeader.setFreeSpace(chmPmglHeader.unmarshalUInt32(data, - chmPmglHeader.getFreeSpace())); - chmPmglHeader.setUnknown0008(chmPmglHeader.unmarshalUInt32(data, - chmPmglHeader.getUnknown0008())); - chmPmglHeader.setBlockPrev(chmPmglHeader.unmarshalInt32(data, - chmPmglHeader.getBlockPrev())); - chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data, - chmPmglHeader.getBlockNext())); + chmPmglHeader.setFreeSpace(chmPmglHeader.unmarshalUInt32(data)); + chmPmglHeader.setUnknown0008(chmPmglHeader.unmarshalUInt32(data)); + chmPmglHeader.setBlockPrev(chmPmglHeader.unmarshalInt32(data)); + chmPmglHeader.setBlockNext(chmPmglHeader.unmarshalInt32(data)); /* check structure */ try { @@ -215,11 +215,4 @@ public class ChmPmglHeader implements Ch protected void setBlockNext(int block_next) { this.block_next = block_next; } - - /** - * @param args - */ - public static void main(String[] args) { - - } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java Mon Nov 17 10:32:23 2014 @@ -81,7 +81,7 @@ public class DirectoryListingEntry { sb.append("length:=" + getLength()); return sb.toString(); } - + /** * Returns an entry name length * @@ -148,7 +148,4 @@ public class DirectoryListingEntry { protected void setLength(int length) { this.length = length; } - - public static void main(String[] args) { - } } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java Mon Nov 17 10:32:23 2014 @@ -19,7 +19,6 @@ package org.apache.tika.parser.chm.core; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.util.Iterator; import java.util.List; import org.apache.tika.exception.TikaException; @@ -359,10 +358,4 @@ public class ChmCommons { return str == null || str.length() == 0; } - /** - * @param args - */ - public static void main(String[] args) { - } - } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java?rev=1640136&r1=1640135&r2=1640136&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java Mon Nov 17 10:32:23 2014 @@ -20,9 +20,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; - import org.apache.tika.exception.TikaException; import org.apache.tika.io.IOUtils; import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet; @@ -96,7 +94,7 @@ public class ChmExtractor { } /** - * Returns lzxc block length + * Returns lzxc hit_cache length * * @return lzxBlockLength */ @@ -105,7 +103,7 @@ public class ChmExtractor { } /** - * Sets lzxc block length + * Sets lzxc hit_cache length * * @param lzxBlockLength */ @@ -114,7 +112,7 @@ public class ChmExtractor { } /** - * Returns lzxc block offset + * Returns lzxc hit_cache offset * * @return lzxBlockOffset */ @@ -123,7 +121,7 @@ public class ChmExtractor { } /** - * Sets lzxc block offset + * Sets lzxc hit_cache offset */ private void setLzxBlockOffset(long lzxBlockOffset) { this.lzxBlockOffset = lzxBlockOffset; @@ -215,7 +213,7 @@ public class ChmExtractor { setLzxBlocksCache(new ArrayList<ChmLzxBlock>()); } catch (IOException e) { - // ignore + e.printStackTrace(); } } @@ -257,34 +255,37 @@ public class ChmExtractor { dataOffset + directoryListingEntry.getLength())); } else if (directoryListingEntry.getEntryType() == EntryType.COMPRESSED && !ChmCommons.hasSkip(directoryListingEntry)) { - /* Gets a chm block info */ + /* Gets a chm hit_cache info */ ChmBlockInfo bb = ChmBlockInfo.getChmBlockInfoInstance( directoryListingEntry, (int) getChmLzxcResetTable() .getBlockLen(), getChmLzxcControlData()); - int i = 0, start = 0, block = 0; + int i = 0, start = 0, hit_cache = 0; if ((getLzxBlockLength() < Integer.MAX_VALUE) && (getLzxBlockOffset() < Integer.MAX_VALUE)) { // TODO: Improve the caching // caching ... = O(n^2) - depends on startBlock and endBlock - if (getLzxBlocksCache().size() != 0) { + start = -1; + if (!getLzxBlocksCache().isEmpty()) { for (i = 0; i < getLzxBlocksCache().size(); i++) { - lzxBlock = getLzxBlocksCache().get(i); - for (int j = bb.getIniBlock(); j <= bb - .getStartBlock(); j++) { - if (lzxBlock.getBlockNumber() == j) + //lzxBlock = getLzxBlocksCache().get(i); + int bn = getLzxBlocksCache().get(i).getBlockNumber(); + for (int j = bb.getIniBlock(); j <= bb.getStartBlock(); j++) { + if (bn == j) { if (j > start) { start = j; - block = i; + hit_cache = i; } - if (start == bb.getStartBlock()) - break; + } } + if (start == bb.getStartBlock()) + break; } } - if (i == getLzxBlocksCache().size() && i == 0) { +// if (i == getLzxBlocksCache().size() && i == 0) { + if (start<0) { start = bb.getIniBlock(); byte[] dataSegment = ChmCommons.getChmBlockSegment( @@ -298,7 +299,7 @@ public class ChmExtractor { getLzxBlocksCache().add(lzxBlock); } else { - lzxBlock = getLzxBlocksCache().get(block); + lzxBlock = getLzxBlocksCache().get(hit_cache); } for (i = start; i <= bb.getEndBlock();) { @@ -349,8 +350,12 @@ public class ChmExtractor { .getBlockCount()) { getLzxBlocksCache().clear(); } + } //end of if + + if (buffer.size() != directoryListingEntry.getLength()) { + throw new TikaException("CHM file extract error: extracted Length is wrong."); } - } + } //end of if compressed } catch (Exception e) { throw new TikaException(e.getMessage()); }
