From d2f17d4a2dc8d7055ea90d4c30e67e2bc0b02293 Mon Sep 17 00:00:00 2001
From: jiadong <jiadong@liii.pro>
Date: Mon, 20 Oct 2025 16:20:35 +0800
Subject: [PATCH] =?UTF-8?q?[208=5F11]=20=E4=BF=AE=E5=A4=8D=20unicode=20?=
 =?UTF-8?q?=E6=A8=A1=E5=9D=97=E8=AF=AD=E6=B3=95=E9=94=99=E8=AF=AF=E5=B9=B6?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20UTF-16LE=20=E8=BD=AC=E6=8D=A2=E5=87=BD?=
 =?UTF-8?q?=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 devel/208_11.md                      | 28 +++++++++++++++++++
 devel/208_2.md                       | 18 ++++++++++++
 goldfish/liii/base.scm               |  2 +-
 goldfish/liii/lang.scm               |  2 +-
 goldfish/liii/rich-string.scm        |  4 +--
 goldfish/liii/unicode.scm            | 41 ++++++++++++++++++++++++++--
 goldfish/scheme/base.scm             |  6 ++--
 tests/goldfish/liii/unicode-test.scm | 38 ++++++++++++++------------
 8 files changed, 112 insertions(+), 27 deletions(-)
 create mode 100644 devel/208_11.md

diff --git a/devel/208_11.md b/devel/208_11.md
new file mode 100644
index 00000000..a90c0518
--- /dev/null
+++ b/devel/208_11.md
@@ -0,0 +1,28 @@
+# [208_11] 重命名 ut8-string-length 为 utf8-string-length
+
+## 任务相关的代码文件
+- `goldfish/scheme/base.scm` - 函数定义和导出
+- `goldfish/liii/unicode.scm` - 函数导出声明
+- `goldfish/liii/base.scm` - 函数导出声明
+- `goldfish/liii/lang.scm` - 函数导入声明
+- `goldfish/liii/rich-string.scm` - 函数使用
+- `tests/goldfish/liii/unicode-test.scm` - 测试用例和文档
+- `devel/208_2.md` - 相关文档
+
+## 如何测试
+一般先构建，再lint，最后运行测试用例。
+
+## 2025-10-20 重命名 ut8-string-length 为 utf8-string-length
+
+### What
+1. 将函数名 `ut8-string-length` 重命名为 `utf8-string-length`
+2. 更新所有相关的函数调用和引用
+3. 确保测试用例通过
+。。。。。。。。。。。。
+### Why
+函数名 `ut8-string-length` 存在拼写错误，应该是 `utf8-string-length` 以保持与 UTF-8 标准命名的一致性。
+
+### How
+1. 首先搜索代码库中所有使用 `ut8-string-length` 的地方
+2. 使用全局替换将函数名更新为正确的拼写
+3. 验证所有测试用例仍然正常工作
\ No newline at end of file
diff --git a/devel/208_2.md b/devel/208_2.md
index 5f288191..88e1421d 100644
--- a/devel/208_2.md
+++ b/devel/208_2.md
@@ -14,6 +14,24 @@ bin/goldfish tools/lint.scm tests/goldfish/liii/unicode-test.scm
 bin/goldfish tests/goldfish/liii/unicode-test.scm
 ```
 
+## 2025/10/20 utf8-string-length 重命名
+### What
+将 u8-string-length 函数重命名为 utf8-string-length，以符合 UTF-8 标准命名规范。
+
+1. 更新 goldfish/scheme/base.scm 中的函数定义和导出声明
+2. 更新 goldfish/liii/unicode.scm 中的导出声明
+3. 更新 goldfish/liii/base.scm 中的导出声明
+4. 更新 goldfish/liii/lang.scm 中的导入声明
+5. 更新 goldfish/liii/rich-string.scm 中的函数使用
+6. 更新 tests/goldfish/liii/unicode-test.scm 中的测试用例和文档
+7. 更新 devel/208_2.md 中的相关文档
+
+### Why
+统一 UTF-8 相关函数的命名规范，提高代码一致性和可读性。
+
+### How
+通过全局搜索和替换，系统性地更新所有相关文件中的函数引用，确保命名一致性。
+
 ## 2025/10/18 u8-string-length 文档
 ### What
 为 u8-string-length 函数添加详细的文档和测试用例。
diff --git a/goldfish/liii/base.scm b/goldfish/liii/base.scm
index d5fb68e9..fbf9316d 100644
--- a/goldfish/liii/base.scm
+++ b/goldfish/liii/base.scm
@@ -44,7 +44,7 @@
     ; R7RS 6.9 Bytevectors
     bytevector? make-bytevector bytevector bytevector-length bytevector-u8-ref
     bytevector-u8-set! bytevector-copy bytevector-append
-    utf8->string string->utf8 u8-string-length u8-substring bytevector-advance-utf8
+    utf8->string string->utf8 utf8-string-length u8-substring bytevector-advance-utf8
     ; Input and Output
     call-with-port port? binary-port? textual-port? input-port-open? output-port-open?
     open-binary-input-file open-binary-output-file close-port eof-object
diff --git a/goldfish/liii/lang.scm b/goldfish/liii/lang.scm
index b1e80dae..02988777 100644
--- a/goldfish/liii/lang.scm
+++ b/goldfish/liii/lang.scm
@@ -17,7 +17,7 @@
 (define-library (liii lang)
                 
   (import (only (liii base)
-                u8-string-length any? receive u8-substring)
+                utf8-string-length any? receive u8-substring)
           (only (liii oop)
                 define-case-class display* @ typed-define case-class? chained-define
                 define-object define-class chain-apply object->string)
diff --git a/goldfish/liii/rich-string.scm b/goldfish/liii/rich-string.scm
index fd23d994..0159ae24 100644
--- a/goldfish/liii/rich-string.scm
+++ b/goldfish/liii/rich-string.scm
@@ -27,7 +27,7 @@
     (define-case-class rich-string
       ((data string?))
   
-      (define N (u8-string-length data))
+      (define N (utf8-string-length data))
 
       (define (@empty . args)
         (chain-apply args (rich-string "")))
@@ -330,7 +330,7 @@
 
       (define (%split sep)
         (let ((str-len N)
-              (sep-len (u8-string-length sep)))
+              (sep-len (utf8-string-length sep)))
     
           (define (split-helper start acc)
             (let ((next-pos (%index-of sep start)))
diff --git a/goldfish/liii/unicode.scm b/goldfish/liii/unicode.scm
index 4d4a8298..9723954c 100644
--- a/goldfish/liii/unicode.scm
+++ b/goldfish/liii/unicode.scm
@@ -17,7 +17,7 @@
 (define-library (liii unicode)
   (export
    ;; UTF-8 函数
-   utf8->string string->utf8 u8-string-length u8-substring bytevector-advance-utf8
+   utf8->string string->utf8 utf8-string-length u8-substring bytevector-advance-utf8
    codepoint->utf8 utf8->codepoint
 
    ;; UTF-16BE 函数
@@ -306,4 +306,41 @@
 
             (else
              ;; 基本多文种平面字符 - 单个码元
-             first-codepoint))))))
\ No newline at end of file
+             first-codepoint))))))
+
+    (define (utf8->utf16le bytevector)
+      (unless (bytevector? bytevector)
+        (error 'type-error "utf8->utf16le: expected bytevector, got" bytevector))
+
+      (let ((len (bytevector-length bytevector)))
+        (when (= len 0)
+          (error 'value-error "utf8->utf16le: empty bytevector"))
+
+        (let loop ((index 0)
+                   (result (bytevector)))
+          (if (>= index len)
+              result
+              (let ((codepoint (utf8->codepoint (bytevector-copy bytevector index len))))
+                (let ((utf16le-bytes (codepoint->utf16le codepoint)))
+                  (loop (bytevector-advance-utf8 bytevector index)
+                        (bytevector-append result utf16le-bytes))))))))
+
+    (define (utf16le->utf8 bytevector)
+      (unless (bytevector? bytevector)
+        (error 'type-error "utf16le->utf8: expected bytevector, got" bytevector))
+
+      (let ((len (bytevector-length bytevector)))
+        (when (= len 0)
+          (error 'value-error "utf16le->utf8: empty bytevector"))
+
+        (let loop ((index 0)
+                   (result (bytevector)))
+          (if (>= index len)
+              result
+              (let ((codepoint (utf16le->codepoint (bytevector-copy bytevector index len))))
+                (let ((utf8-bytes (codepoint->utf8 codepoint)))
+                  (loop (+ index (if (<= codepoint #xFFFF) 2 4))
+                        (bytevector-append result utf8-bytes))))))
+
+))
+
diff --git a/goldfish/scheme/base.scm b/goldfish/scheme/base.scm
index 67a99d9a..bf9bdd20 100644
--- a/goldfish/scheme/base.scm
+++ b/goldfish/scheme/base.scm
@@ -38,7 +38,7 @@
     ; R7RS 6.9: Bytevectors
     bytevector? make-bytevector bytevector bytevector-length bytevector-u8-ref
     bytevector-u8-set! bytevector-copy bytevector-append
-    utf8->string string->utf8 u8-string-length bytevector-advance-utf8
+    utf8->string string->utf8 utf8-string-length bytevector-advance-utf8
     ; Input and Output
     call-with-port port? binary-port? textual-port? input-port-open? output-port-open?
     open-binary-input-file open-binary-output-file close-port eof-object
@@ -445,7 +445,7 @@ wrong-type-arg
                         (+ index 4)))))
              (else index)))))  ; Invalid leading byte
 
-    (define (u8-string-length str)
+    (define (utf8-string-length str)
       (let ((bv (string->byte-vector str))
             (N (string-length str)))
         (if (zero? N)
@@ -493,7 +493,7 @@ wrong-type-arg
   
       (when (not (string? str))
         (error 'type-error "str must be string"))
-      (let ((N (u8-string-length str)))
+      (let ((N (utf8-string-length str)))
         (when (and (> N 0) (or (< start 0) (>= start N)))
           (error 'out-of-range
                  (string-append "start must >= 0 and < " (number->string N))))
diff --git a/tests/goldfish/liii/unicode-test.scm b/tests/goldfish/liii/unicode-test.scm
index 9064d7ba..b73d8bd7 100644
--- a/tests/goldfish/liii/unicode-test.scm
+++ b/tests/goldfish/liii/unicode-test.scm
@@ -71,7 +71,7 @@ string
 相关函数
 --------
 - `string->utf8` : 将字符串转换为 UTF-8 字节向量
-- `u8-string-length` : 获取字符串的 Unicode 字符数量
+- `utf8-string-length` : 获取字符串的 Unicode 字符数量
 - `u8-substring` : 基于 Unicode 字符位置提取子字符串
 |#
 
@@ -178,7 +178,7 @@ bytevector
 相关函数
 --------
 - `utf8->string` : 将 UTF-8 字节向量转换为字符串
-- `u8-string-length` : 获取字符串的 Unicode 字符数量
+- `utf8-string-length` : 获取字符串的 Unicode 字符数量
 - `u8-substring` : 基于 Unicode 字符位置提取子字符串
 |#
 
@@ -234,12 +234,12 @@ bytevector
 (check (utf8->string (string->utf8 "汉字书写" 3)) => "写")
 
 #|
-u8-string-length
+utf8-string-length
 计算 UTF-8 编码字符串的 Unicode 字符数量（码点数量）。
 
 函数签名
 ----
-(u8-string-length string) → integer
+(utf8-string-length string) → integer
 
 参数
 ----
@@ -253,7 +253,7 @@ integer
 
 描述
 ----
-`u8-string-length` 用于计算 UTF-8 编码字符串中的 Unicode 字符数量，与 `string-length` 不同，
+`utf8-string-length` 用于计算 UTF-8 编码字符串中的 Unicode 字符数量，与 `string-length` 不同，
 它返回的是 Unicode 码点（code point）的数量，而不是字节数量。
 
 行为特征
@@ -266,7 +266,7 @@ integer
 与 string-length 的区别
 -------------------
 - `string-length` : 返回字符串的字节数量
-- `u8-string-length` : 返回字符串的 Unicode 字符数量
+- `utf8-string-length` : 返回字符串的 Unicode 字符数量
 
 
 错误处理
@@ -287,15 +287,15 @@ integer
 - `string->utf8` : 将字符串转换为 UTF-8 字节向量
 |#
 
-(check (u8-string-length "") => 0)
-(check (u8-string-length "Hello") => 5)
-(check (u8-string-length "你好") => 2)
-(check (u8-string-length "Hello 你好") => 8)
-(check (u8-string-length "👍") => 1)
-(check (u8-string-length "🚀") => 1)
-(check (u8-string-length "🎉") => 1)
-(check (u8-string-length "Hello 👍 World") => 13)
-(check (u8-string-length "你好 🚀 测试") => 7)
+(check (utf8-string-length "") => 0)
+(check (utf8-string-length "Hello") => 5)
+(check (utf8-string-length "你好") => 2)
+(check (utf8-string-length "Hello 你好") => 8)
+(check (utf8-string-length "👍") => 1)
+(check (utf8-string-length "🚀") => 1)
+(check (utf8-string-length "🎉") => 1)
+(check (utf8-string-length "Hello 👍 World") => 13)
+(check (utf8-string-length "你好 🚀 测试") => 7)
 
 #|
 u8-substring
@@ -352,7 +352,7 @@ string
 
 相关函数
 --------
-- `u8-string-length` : 获取字符串的 Unicode 字符数量
+- `utf8-string-length` : 获取字符串的 Unicode 字符数量
 - `string-substring` : 基于字节位置提取子字符串
 - `utf8->string` : 将 UTF-8 字节向量转换为字符串
 - `string->utf8` : 将字符串转换为 UTF-8 字节向量
@@ -1218,12 +1218,12 @@ UTF-8 编码规则
 实现说明
 ------
 - 函数在 (scheme base) 库中定义，在 (liii base) 和 (liii unicode) 库中重新导出
-- 被 `u8-string-length`、`utf8->string`、`string->utf8` 等函数内部使用
+- 被 `utf8-string-length`、`utf8->string`、`string->utf8` 等函数内部使用
 - 提供 UTF-8 序列验证功能
 
 相关函数
 --------
-- `u8-string-length` : 获取字符串的 Unicode 字符数量
+- `utf8-string-length` : 获取字符串的 Unicode 字符数量
 - `utf8->string` : 将 UTF-8 字节向量转换为字符串
 - `string->utf8` : 将字符串转换为 UTF-8 字节向量
 - `u8-substring` : 基于 Unicode 字符位置提取子字符串
@@ -1287,4 +1287,6 @@ UTF-8 编码规则
 (check (bytevector-advance-utf8 #u8(#x48 #x65 #x6C #x6C #x6F) 3) => 4)
 (check (bytevector-advance-utf8 #u8(#x48 #x65 #x6C #x6C #x6F) 4) => 5)
 
+
+
 (check-report)
-- 
Gitee