diff --git a/TeXmacs/progs/check/check-master.scm b/TeXmacs/progs/check/check-master.scm index 4f8036df79cf3ef7b912a4778e87bfbda31fc1c1..9c17340d858e319f0da461ffcfd6205149c8a08d 100644 --- a/TeXmacs/progs/check/check-master.scm +++ b/TeXmacs/progs/check/check-master.scm @@ -94,4 +94,5 @@ (regtest-tm-define) (regtest-tm-dialogue) (regtest-fonts) + (regtest-tm-tools) ) diff --git a/TeXmacs/progs/texmacs/texmacs/tm-tools-test.scm b/TeXmacs/progs/texmacs/texmacs/tm-tools-test.scm new file mode 100644 index 0000000000000000000000000000000000000000..40567d83b6a2f8a09e4e6305ed17339b05b06a9c --- /dev/null +++ b/TeXmacs/progs/texmacs/texmacs/tm-tools-test.scm @@ -0,0 +1,32 @@ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; MODULE : tm-tools-test.scm +;; DESCRIPTION : Test suite for tm-tools +;; COPYRIGHT : (C) 2023 jingkaimori +;; +;; This software falls under the GNU general public license version 3 or later. +;; It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE +;; in the root directory or . +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(texmacs-module (texmacs texmacs tm-tools-test) + (:use (texmacs texmacs tm-tools))) + +(define (utf8-string-length-test) + (regression-test-group + "unicode character count" "utf8-string-length-test" + utf8-string-length :none + (test "empty length" "" 0) + (test "ascii length (one byte in utf-8)" "Hello world!" 12) + (test "russian length (two byte in utf-8)" "Всем привет!" 12) + (test "kanji length (three byte in utf-8)" "你好世界!" 5) + (test "fraktur length (four byte in utf-8)" "𝕳𝖊𝖑𝖑𝖔 𝖜𝖔𝖗𝖑𝖉!" 12) + (test "hybrid length" "Hello world!Всем привет!你好世界!𝕳𝖊𝖑𝖑𝖔 𝖜𝖔𝖗𝖑𝖉!" 41))) + +(tm-define (regtest-tm-tools) + (let ((n (+ (utf8-string-length-test) + 0))) + (display* "Total: " (object->string n) " tests.\n") + (display "Test suite of tm-tools: ok\n"))) \ No newline at end of file diff --git a/TeXmacs/progs/texmacs/texmacs/tm-tools.scm b/TeXmacs/progs/texmacs/texmacs/tm-tools.scm index f7ccec41730e8aafa1c517c3f691772a28935fd5..167d73e36d0edc7e0b654ce9843be5ec49b859e1 100644 --- a/TeXmacs/progs/texmacs/texmacs/tm-tools.scm +++ b/TeXmacs/progs/texmacs/texmacs/tm-tools.scm @@ -17,9 +17,22 @@ ;; Document statistics ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TODO: if string in scheme represent unicode codepoint with single character +;; rather than utf-8 encoding, replace this function with string-length +(tm-define (utf8-string-length s) + (let ((lth-s (string-length s))) + (if (== lth-s 0) + 0 + (let* ((s-first (char->integer (string-ref s 0))) + (s-rest (substring s 1 lth-s)) + (lth-rest (utf8-string-length s-rest))) + (cond + ((== (logand s-first #xc0) #x80) lth-rest) + (else (+ lth-rest 1))))))) + (tm-define (count-characters doc) (with s (convert doc "texmacs-tree" "verbatim-snippet") - (string-length s))) + (utf8-string-length s))) (define (compress-spaces s) (let* ((s1 (string-replace s "\n" " ")) diff --git a/TeXmacs/tests/17_1.scm b/TeXmacs/tests/17_1.scm new file mode 100644 index 0000000000000000000000000000000000000000..e5db7ad8a751eb958a0f34cb747c7d26722c86fc --- /dev/null +++ b/TeXmacs/tests/17_1.scm @@ -0,0 +1,12 @@ +(define (test-count-characters) + (load-buffer "$TEXMACS_PATH/tests/17_1.tm") + (regression-test-group + "count-characters" "result" + count-characters :none + (test "character count" (buffer-tree) 41))) + +(tm-define (test_9_1) + (let ((n (+ (test-no-title) + 0))) + (display* "Total: " (object->string n) " tests.\n") + (display "Test suite of 9_1: ok\n"))) diff --git a/TeXmacs/tests/17_1.tm b/TeXmacs/tests/17_1.tm new file mode 100644 index 0000000000000000000000000000000000000000..975b4df378e9c23c5cbdc8ee39703ac9fd5b83ce --- /dev/null +++ b/TeXmacs/tests/17_1.tm @@ -0,0 +1,12 @@ + + +> + +<\body> + Hello world!\<#412\>\<#441\>\<#435\>\<#43C\> + \<#43F\>\<#440\>\<#438\>\<#432\>\<#435\>\<#442\>!\<#4F60\>\<#597D\>\<#4E16\>\<#754C\>\<#FF01\>\<#1D573\>\<#1D58A\>\<#1D591\>\<#1D591\>\<#1D594\> + \<#1D59C\>\<#1D594\>\<#1D597\>\<#1D591\>\<#1D589\>! + + + +> \ No newline at end of file