branch: externals/pyim commit 892cf95d35e3da858e48b731e23c5db13e51a8ed Author: Feng Shu <tuma...@163.com> Commit: Feng Shu <tuma...@163.com>
Sort lines when export. --- pyim-dhashcache.el | 101 +++++++++++++++++++++++++++++++--------------------- tests/pyim-tests.el | 20 +++++++++-- 2 files changed, 79 insertions(+), 42 deletions(-) diff --git a/pyim-dhashcache.el b/pyim-dhashcache.el index 07ad13ce3f..962a0abd5b 100644 --- a/pyim-dhashcache.el +++ b/pyim-dhashcache.el @@ -36,6 +36,7 @@ (require 'cl-lib) (require 'async nil t) (require 'pyim-common) +(require 'pyim-cstring) (require 'pyim-dcache) (require 'pyim-dict) (require 'pyim-scheme) @@ -716,51 +717,71 @@ pyim 使用的词库文件是简单的文本文件,编码 *强制* 为 \\='utf 如果 CONFIRM 为 non-nil,文件存在时将会提示用户是否覆盖, 默认为覆盖模式" (with-temp-buffer - (maphash - (lambda (key value) - (let ((value (cl-remove-if - (lambda (x) - ;; 如果某个词条的 text 属性 :noexport 设置为 t, 在导出的 - ;; 时候自动忽略这个词条。 - (and (stringp x) - (get-text-property 0 :noexport x))) - (if (listp value) - value - (list value))))) - (when value - (insert (format "%s %s\n" key (mapconcat #'identity value " ")))))) - dcache) - (sort-lines nil (point-min) (point-max)) - (goto-char (point-min)) - (insert ";;; -*- coding: utf-8-unix -*-\n") - (pyim-dcache-write-file file confirm))) + (let (export-lines) + (maphash + (lambda (key value) + (let ((value (cl-remove-if + (lambda (x) + ;; 如果某个词条的 text 属性 :noexport 设置为 t, 在导出的 + ;; 时候自动忽略这个词条。 + (and (stringp x) + (get-text-property 0 :noexport x))) + (if (listp value) + value + (list value))))) + (when value + (push + (format "%s %s\n" key (mapconcat #'identity value " ")) + export-lines)))) + dcache) + (setq export-lines (sort export-lines #'string<)) + (goto-char (point-min)) + (insert ";;; -*- coding: utf-8-unix -*-\n") + (dolist (line export-lines) + (insert line)) + (pyim-dcache-write-file file confirm)))) (cl-defmethod pyim-dcache-export-words-and-counts (file &context ((pyim-dcache-backend) (eql pyim-dhashcache)) &optional confirm ignore-counts) (with-temp-buffer - (insert ";;; -*- coding: utf-8-unix -*-\n") - (maphash - (lambda (key value) - (insert - (if ignore-counts - (format "%s\n" key) - (format "%s %s\n" key value)))) - pyim-dhashcache-iword2count) - ;; 在默认情况下,用户选择过的词生成的缓存中存在的词条, - ;; `pyim-dhashcache-iword2count' 中也一定存在,但如果用户 - ;; 使用了特殊的方式给用户选择过的词生成的缓存中添加了 - ;; 词条,那么就需要将这些词条也导出,且设置词频为 0 - (maphash - (lambda (_ words) - (dolist (word words) - (unless (gethash word pyim-dhashcache-iword2count) - (insert - (if ignore-counts - (format "%s\n" word) - (format "%s %s\n" word 0)))))) - pyim-dhashcache-icode2word) - (pyim-dcache-write-file file confirm))) + (let (export-lines) + (maphash + (lambda (key value) + (push + (if ignore-counts + (format "%s\n" key) + (format "%s %s\n" key value)) + export-lines)) + pyim-dhashcache-iword2count) + ;; 在默认情况下,用户选择过的词生成的缓存中存在的词条, + ;; `pyim-dhashcache-iword2count' 中也一定存在,但如果用户 + ;; 使用了特殊的方式给用户选择过的词生成的缓存中添加了 + ;; 词条,那么就需要将这些词条也导出,且设置词频为 0 + (maphash + (lambda (_ words) + (dolist (word words) + (unless (gethash word pyim-dhashcache-iword2count) + (push + (if ignore-counts + (format "%s\n" word) + (format "%s %s\n" word 0)) + export-lines)))) + pyim-dhashcache-icode2word) + (setq export-lines + (sort export-lines + #'pyim-dhashcache-pinyin-string<)) + (goto-char (point-min)) + (insert ";;; -*- coding: utf-8-unix -*-\n") + (dolist (line export-lines) + (insert line)) + (pyim-dcache-write-file file confirm)))) + +(defun pyim-dhashcache-pinyin-string< (a b) + "比较 A 和 B 两个字符串的拼音的大小。" + (let ((pinyin1 (pyim-cstring-to-pinyin-simple a)) + (pinyin2 (pyim-cstring-to-pinyin-simple b))) + (string< pinyin1 pinyin2))) ;; * Footer (provide 'pyim-dhashcache) diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el index 90cbb6c48b..cba1482c96 100644 --- a/tests/pyim-tests.el +++ b/tests/pyim-tests.el @@ -1083,6 +1083,7 @@ (pyim-dhashcache-icode2word (make-hash-table :test #'equal)) (file (pyim-tests-make-temp-file))) (puthash "你好" 10 pyim-dhashcache-iword2count) + (puthash "锕系" 10 pyim-dhashcache-iword2count) (puthash "尼耗" 1 pyim-dhashcache-iword2count) (puthash "wo-hao" (list "我好") pyim-dhashcache-icode2word) (puthash "ni-hao" (list "你好" "尼耗") pyim-dhashcache-icode2word) @@ -1091,8 +1092,9 @@ (insert-file-contents file) (should (equal (buffer-string) ";;; -*- coding: utf-8-unix -*- -你好 10 +锕系 10 尼耗 1 +你好 10 我好 0 "))) (pyim-dcache-export-words-and-counts file nil t) @@ -1100,8 +1102,9 @@ (insert-file-contents file) (should (equal (buffer-string) ";;; -*- coding: utf-8-unix -*- -你好 +锕系 尼耗 +你好 我好 "))) (pyim-dcache-export-personal-words file) @@ -1160,6 +1163,19 @@ wo-hao 我好 '("/home/user/test3.pyim" "/home/user/test2.pyim"))))) ;; ** pyim-dhashcache 相关单元测试 +(ert-deftest pyim-tests-pyim-dhashcache-pinyin-string< () + (should (pyim-dhashcache-pinyin-string< "啊" "波")) + (should-not (string< "锕" "波")) + (should (pyim-dhashcache-pinyin-string< "锕" "波")) + (should-not (pyim-dhashcache-pinyin-string< "波" "啊")) + (should (pyim-dhashcache-pinyin-string< "a" "b")) + (should-not (pyim-dhashcache-pinyin-string< "b" "a")) + (should (pyim-dhashcache-pinyin-string< "aa" "ab")) + (should-not (pyim-dhashcache-pinyin-string< "ab" "aa")) + (should (pyim-dhashcache-pinyin-string< "你不好" "你好")) + (should-not (pyim-dhashcache-pinyin-string< "你好" "你不好")) + ) + (ert-deftest pyim-tests-pyim-dhashcache-get-shortcodes () (should (equal (pyim-dhashcache-get-shortcodes ".abcde") nil)) (should (equal (pyim-dhashcache-get-shortcodes "wubi/abcde")