diff --git a/.run/Run.run.xml b/.run/Run.run.xml deleted file mode 100644 index d9f745baf5b2339d6ce4ca7615b149e0f489f305..0000000000000000000000000000000000000000 --- a/.run/Run.run.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - \ No newline at end of file diff --git "a/.run/\346\265\213\350\257\225.run.xml" "b/.run/\346\265\213\350\257\225.run.xml" index dc6fb899164af33fa664d08f42aeaf7b0f1c3273..b673e11f85d375772d038f84b2c80a2571e41c52 100644 --- "a/.run/\346\265\213\350\257\225.run.xml" +++ "b/.run/\346\265\213\350\257\225.run.xml" @@ -13,6 +13,11 @@ + + + + + diff --git a/playground/Demo/Demo26/Main.snow b/playground/Demo/Demo26/Main.snow new file mode 100644 index 0000000000000000000000000000000000000000..060d6673626ffce9cc540cb82bacf6506f78d27b --- /dev/null +++ b/playground/Demo/Demo26/Main.snow @@ -0,0 +1,20 @@ +module: Main + function: main + returns: void + body: + // 1. 常见转义符 + declare sNewline : string = "换行示例:\n第二行" + declare sTab : string = "制表符示例:\t列二" + declare sBackslash: string = "反斜杠示例: C:\\Snow" + declare sDQuote : string = "双引号示例: \"Snow\"" + declare sSQuote : string = "单引号示例: \'Snow\'" + declare sCarriage : string = "回车示例:\rCarriage" + declare sBackspace: string = "退格示例: ABC\bD" + declare sFormFeed : string = "换页示例:\fPage-2" + + // 2. Unicode 转义 + declare sUnicode : string = "𪚥𠮷: \u4F60\u597D, Snow!" + + end body + end function +end module diff --git a/src/main/java/org/jcnc/snow/common/StringEscape.java b/src/main/java/org/jcnc/snow/common/StringEscape.java new file mode 100644 index 0000000000000000000000000000000000000000..fca7b6ccd73f4d7bc4df0f0faee8f36ad63839ad --- /dev/null +++ b/src/main/java/org/jcnc/snow/common/StringEscape.java @@ -0,0 +1,82 @@ +package org.jcnc.snow.common; + +/** + *

+ * 字符串转义/反转义工具类,主要用于: + *

    + *
  • 编译期:将运行时的字符串安全地编码为单行形式(用于 .water 指令文件的保存)。
  • + *
  • 运行期:在虚拟机(VM)执行相关指令时,将转义后的字符串还原成真实字符。
  • + *
+ *
+ * 转义规则兼容 Java 字符串转义(包括 \n, \t, \r 等常见控制字符),同时对于不可见或非 ASCII 字符,会编码为 Unicode 形式(如 uXXXX)。 + *

+ */ +public final class StringEscape { + + /** + * 工具类私有构造方法,禁止实例化。 + */ + private StringEscape() { + } + + /** + * 运行期方法: + *

将转义序列还原为实际字符。

+ * + *
    + *
  • 支持常见的转义字符序列。
  • + *
  • 支持 uXXXX 形式的 Unicode 字符反转义。
  • + *
  • 对于无法识别的转义,按原样输出。
  • + *
+ * + * @param src 含有转义序列的字符串 + * @return 反转义后的字符串,原样还原 + */ + public static String unescape(String src) { + StringBuilder out = new StringBuilder(); + for (int i = 0; i < src.length(); i++) { + char c = src.charAt(i); + if (c != '\\') { // 非转义字符,直接输出 + out.append(c); + continue; + } + + // 如果是最后一个字符为反斜杠,则原样输出 + if (i == src.length() - 1) { + out.append('\\'); + break; + } + + char n = src.charAt(++i); // 下一个字符 + switch (n) { + case 'n' -> out.append('\n'); // 换行 + case 't' -> out.append('\t'); // 制表符 + case 'r' -> out.append('\r'); // 回车 + case 'b' -> out.append('\b'); // 退格 + case 'f' -> out.append('\f'); // 换页 + case '\\' -> out.append('\\'); // 反斜杠 + case '"' -> out.append('"'); // 双引号 + case '\'' -> out.append('\''); // 单引号 + case 'u' -> { + // Unicode 转义,需读取接下来的 4 位十六进制数字 + if (i + 4 <= src.length() - 1) { + String hex = src.substring(i + 1, i + 5); + try { + out.append((char) Integer.parseInt(hex, 16)); + i += 4; + } catch (NumberFormatException ignore) { + // 非法 hex,原样输出 + out.append("\\u").append(hex); + i += 4; + } + } else { + // 字符串末尾长度不足,原样输出 + out.append("\\u"); + } + } + default -> out.append(n); // 其他未定义的转义序列,原样输出 + } + } + return out.toString(); + } +} diff --git a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java index 84d2e7e334f019676e2b1b275320f62d1c8fc42d..e9218ad79711a69e8f20ed76d2a8ab2bbcc2494d 100644 --- a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java +++ b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java @@ -12,46 +12,47 @@ import java.util.Map; import java.util.stream.Collectors; /** - * LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction} + * LoadConstGenerator * *

- * This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions. + * This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions. * If the constant is a {@code String}, it will also be registered in the - * {@link CallGenerator} string constant pool to support syscall downgrade scenarios. + * {@link CallGenerator} string constant pool for later use. *

* *

- * Fix: When the constant is an array (List), type information is preserved in R_PUSH payload: + * Key implementation notes: *

    - *
  • Float is output with f suffix (e.g., 0.1f);
  • - *
  • Long is output with L suffix (e.g., 123L);
  • - *
  • Double/Integer are output in their default format (e.g., 1.0, 42);
  • - *
  • Supports recursive serialization of nested arrays.
  • + *
  • When the constant is an array (List), type information is preserved in the R_PUSH payload:
  • + *
  • Float values get an f suffix (e.g., 0.1f)
  • + *
  • Long values get an L suffix (e.g., 123L)
  • + *
  • Double and Integer values use their default string format (e.g., 1.0, 42)
  • + *
  • Nested arrays are recursively serialized with correct type suffixes.
  • *
- * This prevents float values from being misinterpreted as double on the VM side, - * and avoids Double→Float cast exceptions in later F_STORE operations. + * This prevents type confusion on the VM side (e.g., float being misread as double) + * and avoids cast exceptions during store operations. *

*/ public class LoadConstGenerator implements InstructionGenerator { /** - * Formats a constant value as a string for use as a VM payload. - * Lists are recursively serialized, and Float/Long types include suffixes to preserve type information. + * Formats a constant value for use as a VM instruction payload. + * For lists, recursively formats each element with type suffixes where appropriate. * - * @param v The constant value to format. - * @return The formatted string for use in VM code. + * @param v The constant value. + * @return The formatted string payload for VM code. */ private static String formatConst(Object v) { return formatConst(v, false); } /** - * Internal helper for recursively formatting constant values (including nested arrays) - * with appropriate type suffixes for array payloads. + * Recursively formats constant values (including nested arrays), preserving + * type suffixes and escaping strings. Used internally for array/list handling. * - * @param v The constant value to format. - * @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied. - * @return The formatted string for use in VM code. + * @param v The constant value. + * @param insideArray Whether this value is inside an array context (controls type suffixing). + * @return The formatted string for VM code. */ private static String formatConst(Object v, boolean insideArray) { if (v instanceof List list) { @@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator sb.append("\\n"); + case '\r' -> sb.append("\\r"); + case '\t' -> sb.append("\\t"); + case '\f' -> sb.append("\\f"); + case '\b' -> sb.append("\\b"); + case '\"' -> sb.append("\\\""); + case '\'' -> sb.append("\\'"); + case '\\' -> sb.append("\\\\"); + default -> { + // Escape non-ASCII and control characters using uXXXX + if (ch < 0x20 || ch > 0x7E) { + sb.append(String.format("\\u%04X", (int) ch)); + } else { + sb.append(ch); + } + } + } + } + return sb.toString(); + } + @Override public Class supportedClass() { return LoadConstInstruction.class; } /** - * Generates the VM instructions for a given {@link LoadConstInstruction}. - *

- * This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions, - * marking the local slot type for later operations, and registering string constants if necessary. - *

+ * Generates VM code for a LoadConstInstruction. + * Produces PUSH and STORE instructions, sets the slot type, + * and registers string constants if necessary. * - * @param ins The {@link LoadConstInstruction} to generate code for. - * @param out The {@link VMProgramBuilder} used to collect the generated instructions. - * @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices. - * @param currentFn The name of the current function. + * @param ins The IR instruction to generate. + * @param out The output program builder. + * @param slotMap The mapping from IR virtual register to physical slot. + * @param currentFn The current function name. */ @Override public void generate(LoadConstInstruction ins, @@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator slotMap, String currentFn) { - // 1. Get the constant value + // 1. Retrieve the constant value from the instruction IRConstant constant = (IRConstant) ins.operands().getFirst(); Object value = constant.value(); - // 2. Generate PUSH instruction (array constants use type-aware formatting) + // 2. Format and emit the PUSH instruction (arrays will use type-aware formatting) String payload = formatConst(value); out.emit(OpHelper.pushOpcodeFor(value) + " " + payload); - // 3. STORE the result to the destination slot + // 3. Emit STORE to the destination slot int slot = slotMap.get(ins.dest()); out.emit(OpHelper.storeOpcodeFor(value) + " " + slot); - // 4. Mark the slot's data type for later inference and instruction selection + // 4. Mark the slot's data type for later use (type inference, instruction selection, etc.) char prefix = switch (value) { case Integer _ -> 'I'; // Integer case Long _ -> 'L'; // Long @@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator 'B'; // Byte case Double _ -> 'D'; // Double case Float _ -> 'F'; // Float - case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0) - case String _ -> 'R'; // String constant - case java.util.List _ -> 'R'; // Reference type (arrays, etc.) + case Boolean _ -> 'I'; // Booleans are treated as integers (1/0) + case String _ -> 'R'; // Reference type for strings + case java.util.List _ -> 'R'; // Reference type for arrays/lists case null, default -> throw new IllegalStateException("Unknown constant type: " + (value != null ? value.getClass() : null)); }; out.setSlotType(slot, prefix); - // 5. If the constant is a string, register it for the CallGenerator string pool + // 5. Register the string constant for the string constant pool if needed if (value instanceof String s) { CallGenerator.registerStringConst(ins.dest().id(), s); } diff --git a/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java b/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java index 542a0979be971637c19dcdd221110b808954af42..b4d417c1121549ad64967264e523b8d21fca85ed 100644 --- a/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java +++ b/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java @@ -1,5 +1,6 @@ package org.jcnc.snow.compiler.parser.expression; +import org.jcnc.snow.common.StringEscape; import org.jcnc.snow.compiler.lexer.token.Token; import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode; import org.jcnc.snow.compiler.parser.ast.StringLiteralNode; @@ -26,8 +27,14 @@ public class StringLiteralParselet implements PrefixParselet { */ @Override public ExpressionNode parse(ParserContext ctx, Token token) { + // 去除首尾引号 String raw = token.getRaw(); - String content = raw.substring(1, raw.length() - 1); - return new StringLiteralNode(content, new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName())); + String inner = raw.substring(1, raw.length() - 1); + // 解析转义符与 Unicode 转义 + String value = StringEscape.unescape(inner); + return new StringLiteralNode( + value, + new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName()) + ); } } \ No newline at end of file diff --git a/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java b/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java index 6c12299062f4e958fb8696bab38aff9271963b12..ef73aed538e8797ca22f699cd7fbdfe5aabbbd96 100644 --- a/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java +++ b/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java @@ -10,36 +10,54 @@ import java.util.Collections; import java.util.List; /** - * The {@code RPushCommand} class implements the {@link Command} interface - * and represents the "reference push" instruction ({@code R_PUSH}) in the virtual machine. - * + * The {@code RPushCommand} class implements the {@link Command} interface and provides + * the "reference push" instruction ({@code R_PUSH}) for the virtual machine. *

- * This instruction pushes a reference-type value onto the operand stack. - * The input is parsed from the textual instruction form, which can represent: + * Function: Pushes a reference-type value (String literal or array literal) onto the operand stack. + *

+ * + *

Supported Literals

*
    - *
  • String literals
  • - *
  • Array literals (e.g., {@code [1, 2, 3]}), including nested arrays
  • + *
  • String Literals: Quoted strings (e.g., {@code "hello\nworld"}) with escape sequence support.
  • + *
  • Array Literals: Bracketed array forms (e.g., {@code [1, 2, [3, 4]]}), including nested arrays.
  • *
- *

* - *

- * For array literals, a nested list structure is constructed. In this implementation, - * array literals are pushed as mutable {@link java.util.ArrayList} structures, - * so that subsequent system calls such as {@code ARR_SET} can modify elements in-place. - *

+ *

Implementation Details

+ *
    + *
  • Array literals are parsed into mutable {@link java.util.ArrayList} objects, to support in-place modification (e.g., by {@code ARR_SET}).
  • + *
  • String literals wrapped in quotes are automatically unescaped according to Java string escape rules.
  • + *
  • Handles atomic values: numbers (including hex, binary, float, long, short, byte), booleans, and fallback to string.
  • + *
+ * + *

Example Usage

+ *
+ *     R_PUSH "hello\nworld"     // pushes String "hello\nworld" (with actual newline)
+ *     R_PUSH [1, 2, 3]         // pushes ArrayList {1, 2, 3}
+ *     R_PUSH [1, [2, 3], 4]    // pushes nested arrays as mutable lists
+ * 
+ * + * @author (your name or org) + * @since 1.0 */ public class RPushCommand implements Command { /** - * Executes the R_PUSH command. + * Executes the {@code R_PUSH} instruction. Parses the given literal parameter and pushes it onto the operand stack. + *

+ * Handles: + *

    + *
  • Array literals (e.g., {@code [1, 2, "a"]}), parsed recursively as mutable ArrayLists
  • + *
  • Quoted string literals (e.g., {@code "abc\n"}), parsed with escape sequence support
  • + *
  • Unquoted raw strings, numbers, and atoms
  • + *
* - * @param parts The parts of the instruction, where {@code parts[1..n]} are concatenated as the literal. - * @param pc The current program counter. - * @param stack The operand stack where the result will be pushed. - * @param local The local variable store (unused in this instruction). - * @param callStack The call stack (unused in this instruction). - * @return The new program counter (typically {@code pc+1}). - * @throws IllegalStateException if no literal parameter is provided. + * @param parts The instruction split into parts (opcode and arguments) + * @param pc The current program counter + * @param stack The operand stack to push the value onto + * @param local The local variable store (unused) + * @param callStack The call stack (unused) + * @return The next program counter (pc + 1) + * @throws IllegalStateException if the R_PUSH parameter is missing or parsing fails */ @Override public int execute(String[] parts, int pc, OperandStack stack, LocalVariableStore local, CallStack callStack) { @@ -54,71 +72,64 @@ public class RPushCommand implements Command { } String literal = sb.toString().trim(); - // Check if this is an array literal + // Handle array literal if (literal.startsWith("[") && literal.endsWith("]")) { Object parsed = parseValue(new Cursor(literal)); if (!(parsed instanceof List list)) { - // Should not happen in theory; safety fallback stack.push(parsed); } else { - // Push a deep-mutable copy so ARR_SET can modify elements in-place stack.push(deepMutable(list)); } - } else { - // Regular string, push as-is + } + // String literal with quotes and escapes + else if (literal.length() >= 2 && literal.startsWith("\"") && literal.endsWith("\"")) { + String decoded = parseQuoted(new Cursor(literal)); + stack.push(decoded); + } + // Raw atom or string + else { stack.push(literal); } return pc + 1; } /** - * A simple string cursor, supporting index increment and character reading, for use by the parser. + * Utility class for string parsing, used by the array and string literal parsers. */ static class Cursor { final String s; int i; /** - * Constructs a new {@code Cursor} for the given string. - * - * @param s The string to parse. + * Constructs a cursor over the provided string. + * @param s the input string to parse */ - Cursor(String s) { - this.s = s; - this.i = 0; - } + Cursor(String s) { this.s = s; this.i = 0; } /** * Advances the cursor by one character. */ - void skip() { - i++; - } + void skip() { i++; } /** - * @return {@code true} if the cursor has reached the end of the string. + * Returns true if the cursor has reached the end of the string. + * @return true if end of string */ - boolean end() { - return i >= s.length(); - } + boolean end() { return i >= s.length(); } /** - * Gets the character at the current cursor position. - * - * @return current character - * @throws StringIndexOutOfBoundsException if at end of string + * Returns the current character at the cursor position. + * @return the current character */ - char ch() { - return s.charAt(i); - } + char ch() { return s.charAt(i); } } /** - * Parses a value from the input string at the current cursor position. - * This can be an array literal, a quoted string, or a simple atom (number, word). + * Parses a value from the current cursor position. + * Supports arrays, quoted strings, or atoms. * - * @param c The cursor for parsing. - * @return The parsed value (could be List, String, Number). + * @param c the parsing cursor + * @return the parsed object (List, String, Number, Boolean, or String fallback) */ Object parseValue(Cursor c) { skipWs(c); @@ -130,9 +141,8 @@ public class RPushCommand implements Command { } /** - * Skips whitespace characters in the input string. - * - * @param c The cursor to advance. + * Skips whitespace characters at the cursor. + * @param c the parsing cursor */ private static void skipWs(Cursor c) { while (!c.end()) { @@ -143,13 +153,13 @@ public class RPushCommand implements Command { } /** - * Parses an array literal from the input, including nested arrays. + * Parses an array literal of the form [elem1, elem2, ...] (may be nested). + * Recursively parses elements using {@link #parseValue(Cursor)}. * - * @param c The cursor (positioned at '[' at entry). - * @return A List representing the parsed array. + * @param c the parsing cursor + * @return a List of parsed elements */ private Object parseArray(Cursor c) { - // assumes current char is '[' c.skip(); // skip '[' List out = new ArrayList<>(); skipWs(c); @@ -170,13 +180,12 @@ public class RPushCommand implements Command { } /** - * Parses a quoted string literal, handling escape characters. + * Parses a quoted string, handling standard Java escape sequences (e.g. \n, \t, uXXXX). * - * @param c The cursor (positioned at '"' at entry). - * @return The parsed string value. + * @param c the parsing cursor + * @return the decoded string */ private static String parseQuoted(Cursor c) { - // assumes current char is '"' c.skip(); // skip opening quote StringBuilder sb = new StringBuilder(); while (!c.end()) { @@ -190,8 +199,25 @@ public class RPushCommand implements Command { case 'n' -> sb.append('\n'); case 'r' -> sb.append('\r'); case 't' -> sb.append('\t'); + case 'f' -> sb.append('\f'); + case 'b' -> sb.append('\b'); case '\"' -> sb.append('\"'); + case '\'' -> sb.append('\''); case '\\' -> sb.append('\\'); + case 'u' -> { // Unicode escape: uXXXX + StringBuilder uni = new StringBuilder(); + for (int k = 0; k < 4 && !c.end(); ++k) { + uni.append(c.ch()); + c.skip(); + } + try { + int code = Integer.parseInt(uni.toString(), 16); + sb.append((char) code); + } catch (Exception e) { + // Invalid unicode, append as is + sb.append("\\u").append(uni); + } + } default -> sb.append(esc); } } else if (ch == '\"') { @@ -204,10 +230,10 @@ public class RPushCommand implements Command { } /** - * Parses an atom (number, hexadecimal, binary, or plain string token). + * Parses an atomic value (number, boolean, or fallback string) from the cursor. * - * @param c The cursor. - * @return An Integer, Double, or String, depending on the content. + * @param c the parsing cursor + * @return the parsed object (Integer, Double, Float, Long, Boolean, or String) */ private static Object parseAtom(Cursor c) { StringBuilder sb = new StringBuilder(); @@ -218,7 +244,7 @@ public class RPushCommand implements Command { c.skip(); } String token = sb.toString(); - // try number + // Try number parsing with various notations and types try { if (token.startsWith("0x") || token.startsWith("0X")) { return Integer.parseInt(token.substring(2), 16); @@ -226,6 +252,20 @@ public class RPushCommand implements Command { if (token.startsWith("0b") || token.startsWith("0B")) { return Integer.parseInt(token.substring(2), 2); } + if (token.endsWith("f")) { + return Float.parseFloat(token.substring(0, token.length() - 1)); + } + if (token.endsWith("L")) { + return Long.parseLong(token.substring(0, token.length() - 1)); + } + if (token.endsWith("s")) { + return Short.parseShort(token.substring(0, token.length() - 1)); + } + if (token.endsWith("b")) { + return Byte.parseByte(token.substring(0, token.length() - 1)); + } + if (token.equals("1")) return true; + if (token.equals("0")) return false; if (token.contains(".")) { return Double.parseDouble(token); } @@ -236,13 +276,11 @@ public class RPushCommand implements Command { } } - // ---------------------- helpers for immutability/mutability ---------------------- - /** - * Recursively creates an unmodifiable copy of a list, with all nested lists also unmodifiable. + * Creates a deeply unmodifiable version of the provided list (and its nested lists). * - * @param l The list to make unmodifiable. - * @return An unmodifiable deep copy of the list. + * @param l the original list + * @return an unmodifiable view of the list and all nested lists */ List deepUnmodifiable(List l) { List out = new ArrayList<>(l.size()); @@ -251,10 +289,10 @@ public class RPushCommand implements Command { } /** - * Helper method for {@link #deepUnmodifiable(List)}. Recursively processes each element. + * Helper for {@link #deepUnmodifiable(List)}; handles nested lists recursively. * - * @param v The object to process. - * @return Unmodifiable list if input is a list, otherwise the value itself. + * @param v the object to process + * @return an unmodifiable list if input is a list; otherwise, the object itself */ Object deepUnmodifiableObject(Object v) { if (v instanceof List l) { @@ -264,11 +302,10 @@ public class RPushCommand implements Command { } /** - * Create a deep mutable copy of a nested List structure, preserving element values. - * Nested lists are turned into {@link java.util.ArrayList} so they can be modified by ARR_SET. + * Creates a deeply mutable version of the provided list (and its nested lists). * - * @param l The source list. - * @return Deep mutable copy of the list. + * @param l the original list + * @return a new mutable list (ArrayList), with all nested lists mutable */ private static java.util.List deepMutable(java.util.List l) { java.util.List out = new java.util.ArrayList<>(l.size()); @@ -277,10 +314,10 @@ public class RPushCommand implements Command { } /** - * Helper method for {@link #deepMutable(List)}. Recursively processes each element. + * Helper for {@link #deepMutable(List)}; handles nested lists recursively. * - * @param v The object to process. - * @return Mutable list if input is a list, otherwise the value itself. + * @param v the object to process + * @return a mutable list if input is a list; otherwise, the object itself */ private static Object deepMutableObject(Object v) { if (v instanceof java.util.List l) {