i18npool/CustomTarget_breakiterator.mk | 24 ++- i18npool/CustomTarget_collator.mk | 23 ++- i18npool/CustomTarget_indexentry.mk | 17 +- i18npool/source/breakiterator/data/dict_word_ca.txt | 148 -------------------- 4 files changed, 54 insertions(+), 158 deletions(-)
New commits: commit 585e64ff94646c9204126f6db86b80da86e8a9d6 Author: Caolán McNamara <caol...@redhat.com> Date: Fri Jul 27 13:29:44 2012 +0100 drop hopefully unnecessary catalan word breaking rules various regression tests for the issues that prompted its inclusion all now pass in its absence Change-Id: Ia375322335b4272aa6c3d626b2d98bc64465bf1c diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk index 621e8f1..f7df926 100644 --- a/i18npool/CustomTarget_breakiterator.mk +++ b/i18npool/CustomTarget_breakiterator.mk @@ -70,7 +70,6 @@ i18npool_BRKTXTS := \ char.brk \ count_word_fi.brk \ count_word.brk \ - dict_word_ca.brk \ dict_word_fi.brk \ dict_word_he.brk \ dict_word_hu.brk \ diff --git a/i18npool/source/breakiterator/data/dict_word_ca.txt b/i18npool/source/breakiterator/data/dict_word_ca.txt deleted file mode 100644 index b1666f4..0000000 --- a/i18npool/source/breakiterator/data/dict_word_ca.txt +++ /dev/null @@ -1,148 +0,0 @@ -# -# Copyright (C) 2002-2003, International Business Machines Corporation and others. -# All Rights Reserved. -# -# file: dict_word.txt -# -# ICU Word Break Rules -# See Unicode Standard Annex #29. -# These rules are based on Version 4.0.0, dated 2003-04-17 -# - - - -#################################################################################### -# -# Character class definitions from TR 29 -# -#################################################################################### -$Katakana = [[:Script = KATAKANA:] [:name = KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK:] - [:name = HALFWIDTH KATAKANA VOICED SOUND MARK:] - [:name = HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK:]]; - -$Ideographic = [:Ideographic:]; -$Hangul = [:Script = HANGUL:]; - -$ALetter = [[:Alphabetic:] [:name= COMMERCIAL AT:] [:name= HEBREW PUNCTUATION GERESH:] - - $Ideographic - - $Katakana - - $Hangul - - [:Script = Thai:] - - [:Script = Lao:] - - [:Script = Hiragana:]]; - -$MidLetter = [[:name = APOSTROPHE:] [:name = GRAVE ACCENT:] \u0084 [:name = SOFT HYPHEN:] [:name = MIDDLE DOT:] [:name = GREEK TONOS:] [:name= FULL STOP:] - [:name = HEBREW PUNCTUATION GERSHAYIM:] [:name = DOUBLE VERTICAL LINE:] [:name = LEFT SINGLE QUOTATION MARK:] - [:name = RIGHT SINGLE QUOTATION MARK:] [:name = HYPHENATION POINT:] [:name = PRIME:] - [:name = HYPHEN-MINUS:] ]; - -$SufixLetter = [:name= FULL STOP:]; - - -$MidNum = [[:LineBreak = Infix_Numeric:] [:name= COMMERCIAL AT:] \u0084 [:name = GREEK TONOS:] [:name = ARABIC DECIMAL SEPARATOR:] - [:name = LEFT SINGLE QUOTATION MARK:] [:name = RIGHT SINGLE QUOTATION MARK:] [:name = SINGLE HIGH-REVERSED-9 QUOTATION MARK:] - [:name = PRIME:]]; -$Numeric = [:LineBreak = Numeric:]; - - -$TheZWSP = \u200b; - -# -# Character Class Definitions. -# The names are those from TR29. -# -$CR = \u000d; -$LF = \u000a; -$Control = [[[:Zl:] [:Zp:] [:Cc:] [:Cf:]] - $TheZWSP]; -$Extend = [[:Grapheme_Extend = TRUE:]]; - - - - -#################################################################################### -# -# Word Break Rules. Definitions and Rules specific to word break begin Here. -# -#################################################################################### - -$Format = [[:Cf:] - $TheZWSP]; - - - -# Rule 3: Treat a grapheme cluster as if it were a single character. -# Hangul Syllables are easier to deal with here than they are in Grapheme Clusters -# because we don't need to find the boundaries between adjacent syllables - -# they won't be word boundaries. -# - - -# -# "Extended" definitions. Grapheme Cluster + Format Chars, treated like the base char. -# -$ALetterEx = $ALetter $Extend*; -$NumericEx = $Numeric $Extend*; -$MidNumEx = $MidNum $Extend*; -$MidLetterEx = $MidLetter $Extend*; -$SufixLetterEx= $SufixLetter $Extend*; -$KatakanaEx = $Katakana $Extend*; -$IdeographicEx= $Ideographic $Extend*; -$HangulEx = $Hangul $Extend*; -$FormatEx = $Format $Extend*; - - -# -# Numbers. Rules 8, 11, 12 form the TR. -# -$NumberSequence = $NumericEx ($FormatEx* $MidNumEx? $FormatEx* $NumericEx)*; -$NumberSequence {100}; - -# -# Words. Alpha-numerics. Rule 5, 6, 7, 9, 10 -# - must include at least one letter. -# - may include both letters and numbers. -# - may include MideLetter, MidNumber punctuation. -# -$LetterSequence = $ALetterEx ($FormatEx* $MidLetterEx? $FormatEx* $ALetterEx)*; # rules #6, #7 -($NumberSequence $FormatEx*)? $LetterSequence ($FormatEx* ($NumberSequence | $LetterSequence))* $SufixLetterEx? {200}; - -[[:P:][:S:]]*; - -# -# Do not break between Katakana. Rule #13. -# -$KatakanaEx ($FormatEx* $KatakanaEx)* {300}; -[:Hiragana:] $Extend* {300}; - -# -# Ideographic Characters. Stand by themselves as words. -# Separated from the "Everything Else" rule, below, only so that they -# can be tagged with a return value. TODO: is this what we want? -# -$IdeographicEx ($FormatEx* $IdeographicEx)* {400}; -$HangulEx ($FormatEx* $HangulEx)* {400}; - -# -# Everything Else, with no tag. -# Non-Control chars combine with $Extend (combining) chars. -# Controls are do not. -# -[^$Control [:Ideographic:]] $Extend*; -$CR $LF; - -# -# Reverse Rules. Back up over any of the chars that can group together. -# (Reverse rules do not need to be exact; they can back up too far, -# but must back up at least enough, and must stop on a boundary.) -# - -# NonStarters are the set of all characters that can appear at the 2nd - nth position of -# a word. (They may also be the first.) The reverse rule skips over these, until it -# reaches something that can only be the start (and probably only) char in a "word". -# A space or punctuation meets the test. -# -$NonStarters = [$Numeric $ALetter $Katakana $Ideographic $Hangul [:P:] [:S:] $MidLetter $MidNum $SufixLetter $Extend $Format]; - -#!.*; -! ($NonStarters* | \n \r) .; - commit 82c25249e624cb54ca6d3293d1c3d0d8ebc208e0 Author: Caolán McNamara <caol...@redhat.com> Date: Fri Jul 27 14:11:08 2012 +0100 list dependencies explicitly and make the list (by its makefile proxy) a dependency of the output so that removing an entry will trigger a rebuild of the target and incremental builds are possible Change-Id: I18c8d5ea2140e61b2ef78e256871402be94b79e2 diff --git a/i18npool/CustomTarget_breakiterator.mk b/i18npool/CustomTarget_breakiterator.mk index 3e0df9e..621e8f1 100644 --- a/i18npool/CustomTarget_breakiterator.mk +++ b/i18npool/CustomTarget_breakiterator.mk @@ -65,20 +65,35 @@ i18npool_GENCMNTARGET := i18npool_GENCMN := $(SYSTEM_GENCMN) endif -i18npool_BRKFILES := $(subst .txt,.brk,$(notdir \ - $(wildcard $(SRCDIR)/i18npool/source/breakiterator/data/*.txt))) +i18npool_BRKTXTS := \ + char_in.brk \ + char.brk \ + count_word_fi.brk \ + count_word.brk \ + dict_word_ca.brk \ + dict_word_fi.brk \ + dict_word_he.brk \ + dict_word_hu.brk \ + dict_word_nodash.brk \ + dict_word_prepostdash.brk \ + dict_word.brk \ + edit_word_he.brk \ + edit_word_hu.brk \ + edit_word.brk \ + line.brk \ + sent.brk # 'gencmn', 'genbrk' and 'genccode' are tools generated and delivered by icu project to process icu breakiterator rules. # The output of gencmn generates warnings under Windows. We want to minimize the patches to external tools, # so the output (OpenOffice_dat.c) is changed here to include a pragma to disable the warnings. # Output of gencmn is redirected to OpenOffice_tmp.c with the -t switch. -$(i18npool_BIDIR)/OpenOffice_dat.c : \ - $(patsubst %.brk,$(i18npool_BIDIR)/%_brk.c,$(i18npool_BRKFILES)) \ +$(i18npool_BIDIR)/OpenOffice_dat.c : $(SRCDIR)/i18npool/CustomTarget_breakiterator.mk \ + $(patsubst %.brk,$(i18npool_BIDIR)/%_brk.c,$(i18npool_BRKTXTS)) \ $(i18npool_GENCMNTARGET) $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),CMN,1) $(call gb_Helper_abbreviate_dirs,\ RESPONSEFILE=$(shell $(gb_MKTEMP)) && \ - $(foreach brk,$(i18npool_BRKFILES),echo '$(brk)' >> $${RESPONSEFILE} && ) \ + $(foreach brk,$(i18npool_BRKTXTS),echo '$(brk)' >> $${RESPONSEFILE} && ) \ $(i18npool_GENCMN) -n OpenOffice -t tmp -S -d $(i18npool_BIDIR)/ 0 $${RESPONSEFILE} && \ rm -f $${RESPONSEFILE} && \ echo '#ifdef _MSC_VER' > $@ && \ diff --git a/i18npool/CustomTarget_collator.mk b/i18npool/CustomTarget_collator.mk index 95cf190..2613f67 100644 --- a/i18npool/CustomTarget_collator.mk +++ b/i18npool/CustomTarget_collator.mk @@ -29,9 +29,28 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/collator)) i18npool_CODIR := $(call gb_CustomTarget_get_workdir,i18npool/collator) -i18npool_COTXTS := $(notdir $(wildcard $(SRCDIR)/i18npool/source/collator/data/*.txt)) +i18npool_COTXTS := \ + ca_charset.txt \ + dz_charset.txt \ + hu_charset.txt \ + ja_charset.txt \ + ja_phonetic_alphanumeric_first.txt \ + ja_phonetic_alphanumeric_last.txt \ + ko_charset.txt \ + ku_alphanumeric.txt \ + ln_charset.txt \ + my_dictionary.txt \ + ne_charset.txt \ + zh_charset.txt \ + zh_pinyin.txt \ + zh_radical.txt \ + zh_stroke.txt \ + zh_TW_charset.txt \ + zh_TW_radical.txt \ + zh_TW_stroke.txt \ + zh_zhuyin.txt -$(call gb_CustomTarget_get_target,i18npool/collator) : \ +$(call gb_CustomTarget_get_target,i18npool/collator) : $(SRCDIR)/i18npool/CustomTarget_collator.mk \ $(i18npool_CODIR)/lrl_include.hxx $(foreach txt,$(i18npool_COTXTS), \ $(patsubst %.txt,$(i18npool_CODIR)/collator_%.cxx,$(txt))) diff --git a/i18npool/CustomTarget_indexentry.mk b/i18npool/CustomTarget_indexentry.mk index 8a6a63c..03a8ff5 100644 --- a/i18npool/CustomTarget_indexentry.mk +++ b/i18npool/CustomTarget_indexentry.mk @@ -28,9 +28,20 @@ $(eval $(call gb_CustomTarget_CustomTarget,i18npool/indexentry)) -$(call gb_CustomTarget_get_target,i18npool/indexentry) : \ - $(patsubst %.txt,$(call gb_CustomTarget_get_workdir,i18npool/indexentry)/%.cxx,$(notdir \ - $(wildcard $(SRCDIR)/i18npool/source/indexentry/data/*.txt))) +i18npool_IDXTXTS := \ + indexdata_ko_dict.txt \ + indexdata_zh_pinyin.txt \ + indexdata_zh_radical.txt \ + indexdata_zh_stroke.txt \ + indexdata_zh_TW_radical.txt \ + indexdata_zh_TW_stroke.txt \ + indexdata_zh_zhuyin.txt \ + ko_phonetic.txt \ + zh_pinyin.txt \ + zh_zhuyin.txt + +$(call gb_CustomTarget_get_target,i18npool/indexentry) : $(SRCDIR)/i18npool/CustomTarget_indexentry.mk \ + $(patsubst %.txt,$(call gb_CustomTarget_get_workdir,i18npool/indexentry)/%.cxx,$(i18npool_IDXTXTS)) $(call gb_CustomTarget_get_workdir,i18npool/indexentry)/%.cxx : \ $(SRCDIR)/i18npool/source/indexentry/data/%.txt \
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits