branch: externals/pyim commit d58d03e3a163933d7c7efe4357269fc8bc78ae7c Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
Add pyim-candidates-quanpin-first-chars --- pyim-candidates.el | 83 +++++++++++++++++++++++++++-------------------------- tests/pyim-tests.el | 40 +++++++++++++++++++------- 2 files changed, 73 insertions(+), 50 deletions(-) diff --git a/pyim-candidates.el b/pyim-candidates.el index bf6f0c0759..bd1272f27e 100644 --- a/pyim-candidates.el +++ b/pyim-candidates.el @@ -153,19 +153,20 @@ "用于全拼输入法的 `pyim-candidates-create' 方法内部使用的函数。" (let* ((znabc-words (pyim-candidates-znabc-words imobjs scheme fast-search)) (jianpin-words (pyim-candidates-jianpin-words imobjs scheme fast-search)) - (dcache-words (pyim-candidates-dcache-quanpin-words imobjs scheme fast-search)) - (personal-words (pyim-candidates-sort (nth 0 dcache-words))) + (quanpin-words (pyim-candidates-quanpin-words imobjs scheme fast-search)) + (personal-words (pyim-candidates-sort (nth 0 quanpin-words))) + (common-words (nth 1 quanpin-words)) (chief-word (pyim-candidates-get-chief scheme personal-words)) - (common-words (nth 1 dcache-words)) - (pinyin-chars-1 (nth 2 dcache-words)) - (pinyin-chars-2 (nth 3 dcache-words)) + (quanpin-chars (pyim-candidates-quanpin-first-chars imobjs scheme fast-search)) + (matched-chars (nth 0 quanpin-chars)) + (possible-chars (nth 1 quanpin-chars)) (words `( :chief-word ,chief-word :personal-words ,@personal-words :jianpin-words ,@jianpin-words :common-words ,@common-words :znabc-words ,@znabc-words - :pinyin-chars-1 ,@pinyin-chars-1 - :pinyin-chars-2 ,@pinyin-chars-2))) + :matched-chars ,@matched-chars + :possible-chars ,@possible-chars))) (when pyim-debug (print words)) (delete-dups (cl-remove-if-not #'stringp words)))) @@ -211,24 +212,17 @@ (push (delete-dups (append w2 w1)) jianpin-words))) (pyim-zip (nreverse jianpin-words) fast-search)))) -(defun pyim-candidates-dcache-quanpin-words (imobjs scheme &optional fast-search) - "从 dcache 获取个人词条,词库词条和第一汉字列表。" - (let (personal-words common-words pinyin-chars-1 pinyin-chars-2) +(defun pyim-candidates-quanpin-words (imobjs scheme &optional fast-search) + "从 dcache 获取个人词条,词库词条。" + (let (personal-words common-words) (dolist (imobj imobjs) (let* ((w1 (pyim-candidates-quanpin-personal-words imobj scheme)) - (w2 (pyim-candidates-quanpin-common-words imobj scheme)) - (w3 (pyim-candidates-quanpin-first-chars imobj scheme)) - (w4 (unless w3 - (pyim-candidates-quanpin-first-possible-chars imobj scheme)))) + (w2 (pyim-candidates-quanpin-common-words imobj scheme))) (push w1 personal-words) - (push w2 common-words) - (push w3 pinyin-chars-1) - (push w4 pinyin-chars-2))) + (push w2 common-words))) (setq personal-words (pyim-zip (nreverse personal-words) fast-search)) (setq common-words (pyim-zip (nreverse common-words) fast-search)) - (setq pinyin-chars-1 (pyim-zip (nreverse pinyin-chars-1) fast-search)) - (setq pinyin-chars-2 (pyim-zip (nreverse pinyin-chars-2) fast-search)) - (list personal-words common-words pinyin-chars-1 pinyin-chars-2))) + (list personal-words common-words))) (defun pyim-candidates-quanpin-personal-words (imobj scheme) (pyim-dcache-get @@ -244,33 +238,42 @@ '(code2word shortcode2word) '(code2word)))) -(defun pyim-candidates-quanpin-first-chars (imobj scheme) +(defun pyim-candidates-quanpin-first-chars (imobjs scheme &optional fast-search) + "获取词条第一汉字列表。" + (let (matched-chars possible-chars) + (dolist (imobj imobjs) + (let* ((w1 (pyim-candidates-quanpin-first-matched-chars imobj scheme)) + (w2 (unless w1 + (pyim-candidates-quanpin-first-possible-chars imobj scheme)))) + (push w1 matched-chars) + (push w2 possible-chars))) + (setq matched-chars (pyim-zip (nreverse matched-chars) fast-search)) + (setq possible-chars (pyim-zip (nreverse possible-chars) fast-search)) + (list matched-chars possible-chars))) + +(defun pyim-candidates-quanpin-first-matched-chars (imobj scheme) "获取输入的全拼对应的第一个汉字。 假如用户输入 nihao 时,获取 ni 对应的汉字。" - (let* ((code (car (pyim-codes-create imobj scheme))) - (chars (delete-dups - `(,@(pyim-dcache-get code '(icode2word code2word)) - ,@(pyim-pymap-py2cchar-get code t t))))) - chars)) + (let ((code (car (pyim-codes-create imobj scheme)))) + (delete-dups + `(,@(pyim-dcache-get code '(icode2word code2word)) + ,@(pyim-pymap-py2cchar-get code t t))))) (defun pyim-candidates-quanpin-first-possible-chars (imobj scheme) "获取输入的全拼对应的第一个可能的常用汉字。 -1. 假如用户输入 ni 时,获取拼音匹配 ni.* 的常用汉字,比如: ni - niao ning niu 等等。 -2. 假如用户输入 nihao 时,获取拼音为 ni 的常用汉字。" - (let* ((pinyin (car (pyim-codes-create imobj scheme))) - (chars (mapcar #'char-to-string - (pyim-zip - (mapcar (lambda (x) - ;; NOTE: 这里只取最常用的汉字,太多的汉字会带 - ;; 来后续处理压力,可能拖慢输入法。不过这个结 - ;; 论只是猜测。 - (car (split-string x "|"))) - (pyim-pymap-py2cchar-get - pinyin (> (length imobj) 1))))))) - chars)) +假如用户输入 ni 时,获取拼音匹配 ni.* 的常用汉字,比如:ni niao +ning niu 等等。" + (let ((pinyin (car (pyim-codes-create imobj scheme)))) + (mapcar #'char-to-string + (pyim-zip + (mapcar (lambda (x) + ;; NOTE: 这里只取最常用的汉字,太多的汉字会带 + ;; 来后续处理压力,可能拖慢输入法。不过这个结 + ;; 论只是猜测。 + (car (split-string x "|"))) + (pyim-pymap-py2cchar-get pinyin nil 1)))))) (cl-defgeneric pyim-candidates-create-limit-time (_imobjs _scheme) "按照 SCHEME, 使用限时运行的方式从 IMOBJS 获得候选词条。 diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el index b112934529..7c25ff4385 100644 --- a/tests/pyim-tests.el +++ b/tests/pyim-tests.el @@ -590,7 +590,7 @@ (should (equal (pyim-tests-sublists lists 4) '((1 2 3) (1 2 3 4)))))) -(ert-deftest pyim-tests-pyim-candidates-get-dcache-words () +(ert-deftest pyim-tests-pyim-candidates-quanpin-words/first-chars () (let* ((pyim-dhashcache-code2word (make-hash-table :test #'equal)) (pyim-dhashcache-icode2word (make-hash-table :test #'equal)) (pyim-dhashcache-shortcode2word (make-hash-table :test #'equal)) @@ -605,12 +605,32 @@ (puthash "ni-hao" (list "你好" "尼耗" "呢耗") pyim-dhashcache-icode2word) (puthash "ni-hao" (list "你好" "尼耗") pyim-dhashcache-code2word) (puthash "n-h" (list "你好" "你坏" "尼耗" "南好" "内核" "内河") pyim-dhashcache-ishortcode2word) - (should (equal (pyim-tests-sublists (pyim-candidates-dcache-quanpin-words imobjs1 quanpin nil) 10) - '(("你" "您" "妮") nil nil ("南" "乃" "囊" "脑" "呢" "内" "嫩" "能" "你" "年")))) - (should (equal (pyim-tests-sublists (pyim-candidates-dcache-quanpin-words imobjs2 quanpin nil) 10) - '(("你" "尼") ("尼" "你") ("你" "尼" "呢" "泥" "拟" "逆" "倪" "妮" "腻" "匿") nil))) - (should (equal (pyim-tests-sublists (pyim-candidates-dcache-quanpin-words imobjs3 quanpin nil) 10) - '(("你好" "尼耗" "呢耗") ("你好" "尼耗") ("你好" "尼耗" "呢耗") nil))))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-words imobjs1 quanpin nil) 10) + '(("你" "您" "妮") nil))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-first-chars imobjs1 quanpin nil) 10) + '(nil ("南" "乃" "囊" "脑" "呢" "内" "嫩" "能" "你" "年")))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-words imobjs1 quanpin nil) 10) + '(("你" "您" "妮") nil))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-first-chars imobjs1 quanpin nil) 10) + '(nil ("南" "乃" "囊" "脑" "呢" "内" "嫩" "能" "你" "年")))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-words imobjs2 quanpin nil) 10) + '(("你" "尼") ("尼" "你")))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-first-chars imobjs2 quanpin nil) 10) + '(("你" "尼" "呢" "泥" "拟" "逆" "倪" "妮" "腻" "匿") nil))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-words imobjs3 quanpin nil) 10) + '(("你好" "尼耗" "呢耗") ("你好" "尼耗")))) + + (should (equal (pyim-tests-sublists (pyim-candidates-quanpin-first-chars imobjs3 quanpin nil) 10) + '(("你好" "尼耗" "呢耗") nil))) + + )) (ert-deftest pyim-tests-pyim-candidates-quanpin-personal-words () (let* ((pyim-dhashcache-icode2word (make-hash-table :test #'equal)) @@ -648,14 +668,14 @@ (should (equal (pyim-candidates-quanpin-common-words (car imobjs3) quanpin) '("呢耗" "你好" "你坏" "尼耗"))))) -(ert-deftest pyim-tests-pyim-candidates-quanpin-first-chars () +(ert-deftest pyim-tests-pyim-candidates-quanpin-first-matched-chars () (let* ((pyim-dhashcache-icode2word (make-hash-table :test #'equal)) (pyim-dhashcache-code2word (make-hash-table :test #'equal)) (quanpin (pyim-scheme-get 'quanpin)) (imobjs (pyim-imobjs-create "nihao" quanpin))) (puthash "ni" (list "你" "呢") pyim-dhashcache-icode2word) (puthash "ni" (list "你" "尼") pyim-dhashcache-code2word) - (should (equal (pyim-tests-sublist (pyim-candidates-quanpin-first-chars (car imobjs) quanpin) 10) + (should (equal (pyim-tests-sublist (pyim-candidates-quanpin-first-matched-chars (car imobjs) quanpin) 10) '("你" "呢" "尼" "泥" "拟" "逆" "倪" "妮" "腻" "匿"))))) (ert-deftest pyim-tests-pyim-candidates-quanpin-first-possible-chars () @@ -665,7 +685,7 @@ (should (equal (pyim-tests-sublist (pyim-candidates-quanpin-first-possible-chars (car imobjs1) quanpin) 10) '("你" "年" "娘" "鸟" "摄" "您" "宁" "牛" "尼" "念"))) (should (equal (pyim-tests-sublist (pyim-candidates-quanpin-first-possible-chars (car imobjs2) quanpin) 10) - '("你" "尼" "呢" "泥" "拟" "逆" "倪" "妮" "腻" "匿"))))) + '("你" "年" "娘" "鸟" "摄" "您" "宁" "牛" "尼" "念"))))) (ert-deftest pyim-tests-pyim-candidates-search-buffer () (with-temp-buffer