+ *
+ * @param src 含有转义序列的字符串
+ * @return 反转义后的字符串,原样还原
+ */
+ public static String unescape(String src) {
+ StringBuilder out = new StringBuilder();
+ for (int i = 0; i < src.length(); i++) {
+ char c = src.charAt(i);
+ if (c != '\\') { // 非转义字符,直接输出
+ out.append(c);
+ continue;
+ }
+
+ // 如果是最后一个字符为反斜杠,则原样输出
+ if (i == src.length() - 1) {
+ out.append('\\');
+ break;
+ }
+
+ char n = src.charAt(++i); // 下一个字符
+ switch (n) {
+ case 'n' -> out.append('\n'); // 换行
+ case 't' -> out.append('\t'); // 制表符
+ case 'r' -> out.append('\r'); // 回车
+ case 'b' -> out.append('\b'); // 退格
+ case 'f' -> out.append('\f'); // 换页
+ case '\\' -> out.append('\\'); // 反斜杠
+ case '"' -> out.append('"'); // 双引号
+ case '\'' -> out.append('\''); // 单引号
+ case 'u' -> {
+ // Unicode 转义,需读取接下来的 4 位十六进制数字
+ if (i + 4 <= src.length() - 1) {
+ String hex = src.substring(i + 1, i + 5);
+ try {
+ out.append((char) Integer.parseInt(hex, 16));
+ i += 4;
+ } catch (NumberFormatException ignore) {
+ // 非法 hex,原样输出
+ out.append("\\u").append(hex);
+ i += 4;
+ }
+ } else {
+ // 字符串末尾长度不足,原样输出
+ out.append("\\u");
+ }
+ }
+ default -> out.append(n); // 其他未定义的转义序列,原样输出
+ }
+ }
+ return out.toString();
+ }
+}
diff --git a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
index 84d2e7e334f019676e2b1b275320f62d1c8fc42d..e9218ad79711a69e8f20ed76d2a8ab2bbcc2494d 100644
--- a/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
+++ b/src/main/java/org/jcnc/snow/compiler/backend/generator/LoadConstGenerator.java
@@ -12,46 +12,47 @@ import java.util.Map;
import java.util.stream.Collectors;
/**
- * LoadConstGenerator - Generates VM instructions from IR {@code LoadConstInstruction}
+ * LoadConstGenerator
*
*
- * This class is responsible for converting IR-level {@link LoadConstInstruction} into corresponding VM instructions.
+ * This generator converts an IR-level {@link LoadConstInstruction} into corresponding VM instructions.
* If the constant is a {@code String}, it will also be registered in the
- * {@link CallGenerator} string constant pool to support syscall downgrade scenarios.
+ * {@link CallGenerator} string constant pool for later use.
*
*
*
- * Fix: When the constant is an array (List), type information is preserved in R_PUSH payload:
+ * Key implementation notes:
*
- *
Float is output with f suffix (e.g., 0.1f);
- *
Long is output with L suffix (e.g., 123L);
- *
Double/Integer are output in their default format (e.g., 1.0, 42);
- *
Supports recursive serialization of nested arrays.
+ *
When the constant is an array (List), type information is preserved in the R_PUSH payload:
+ *
Float values get an f suffix (e.g., 0.1f)
+ *
Long values get an L suffix (e.g., 123L)
+ *
Double and Integer values use their default string format (e.g., 1.0, 42)
+ *
Nested arrays are recursively serialized with correct type suffixes.
*
- * This prevents float values from being misinterpreted as double on the VM side,
- * and avoids Double→Float cast exceptions in later F_STORE operations.
+ * This prevents type confusion on the VM side (e.g., float being misread as double)
+ * and avoids cast exceptions during store operations.
*
*/
public class LoadConstGenerator implements InstructionGenerator {
/**
- * Formats a constant value as a string for use as a VM payload.
- * Lists are recursively serialized, and Float/Long types include suffixes to preserve type information.
+ * Formats a constant value for use as a VM instruction payload.
+ * For lists, recursively formats each element with type suffixes where appropriate.
*
- * @param v The constant value to format.
- * @return The formatted string for use in VM code.
+ * @param v The constant value.
+ * @return The formatted string payload for VM code.
*/
private static String formatConst(Object v) {
return formatConst(v, false);
}
/**
- * Internal helper for recursively formatting constant values (including nested arrays)
- * with appropriate type suffixes for array payloads.
+ * Recursively formats constant values (including nested arrays), preserving
+ * type suffixes and escaping strings. Used internally for array/list handling.
*
- * @param v The constant value to format.
- * @param insideArray True if currently formatting inside an array context; affects whether type suffixes are applied.
- * @return The formatted string for use in VM code.
+ * @param v The constant value.
+ * @param insideArray Whether this value is inside an array context (controls type suffixing).
+ * @return The formatted string for VM code.
*/
private static String formatConst(Object v, boolean insideArray) {
if (v instanceof List> list) {
@@ -61,10 +62,10 @@ public class LoadConstGenerator implements InstructionGenerator sb.append("\\n");
+ case '\r' -> sb.append("\\r");
+ case '\t' -> sb.append("\\t");
+ case '\f' -> sb.append("\\f");
+ case '\b' -> sb.append("\\b");
+ case '\"' -> sb.append("\\\"");
+ case '\'' -> sb.append("\\'");
+ case '\\' -> sb.append("\\\\");
+ default -> {
+ // Escape non-ASCII and control characters using uXXXX
+ if (ch < 0x20 || ch > 0x7E) {
+ sb.append(String.format("\\u%04X", (int) ch));
+ } else {
+ sb.append(ch);
+ }
+ }
+ }
+ }
+ return sb.toString();
+ }
+
@Override
public Class supportedClass() {
return LoadConstInstruction.class;
}
/**
- * Generates the VM instructions for a given {@link LoadConstInstruction}.
- *
- * This includes formatting the constant value, emitting the corresponding PUSH and STORE instructions,
- * marking the local slot type for later operations, and registering string constants if necessary.
- *
+ * Generates VM code for a LoadConstInstruction.
+ * Produces PUSH and STORE instructions, sets the slot type,
+ * and registers string constants if necessary.
*
- * @param ins The {@link LoadConstInstruction} to generate code for.
- * @param out The {@link VMProgramBuilder} used to collect the generated instructions.
- * @param slotMap A mapping from {@link IRVirtualRegister} to physical slot indices.
- * @param currentFn The name of the current function.
+ * @param ins The IR instruction to generate.
+ * @param out The output program builder.
+ * @param slotMap The mapping from IR virtual register to physical slot.
+ * @param currentFn The current function name.
*/
@Override
public void generate(LoadConstInstruction ins,
@@ -118,19 +144,19 @@ public class LoadConstGenerator implements InstructionGenerator slotMap,
String currentFn) {
- // 1. Get the constant value
+ // 1. Retrieve the constant value from the instruction
IRConstant constant = (IRConstant) ins.operands().getFirst();
Object value = constant.value();
- // 2. Generate PUSH instruction (array constants use type-aware formatting)
+ // 2. Format and emit the PUSH instruction (arrays will use type-aware formatting)
String payload = formatConst(value);
out.emit(OpHelper.pushOpcodeFor(value) + " " + payload);
- // 3. STORE the result to the destination slot
+ // 3. Emit STORE to the destination slot
int slot = slotMap.get(ins.dest());
out.emit(OpHelper.storeOpcodeFor(value) + " " + slot);
- // 4. Mark the slot's data type for later inference and instruction selection
+ // 4. Mark the slot's data type for later use (type inference, instruction selection, etc.)
char prefix = switch (value) {
case Integer _ -> 'I'; // Integer
case Long _ -> 'L'; // Long
@@ -138,15 +164,15 @@ public class LoadConstGenerator implements InstructionGenerator 'B'; // Byte
case Double _ -> 'D'; // Double
case Float _ -> 'F'; // Float
- case Boolean _ -> 'I'; // Boolean handled as Integer (typically lowered to 1/0)
- case String _ -> 'R'; // String constant
- case java.util.List> _ -> 'R'; // Reference type (arrays, etc.)
+ case Boolean _ -> 'I'; // Booleans are treated as integers (1/0)
+ case String _ -> 'R'; // Reference type for strings
+ case java.util.List> _ -> 'R'; // Reference type for arrays/lists
case null, default -> throw new IllegalStateException("Unknown constant type: "
+ (value != null ? value.getClass() : null));
};
out.setSlotType(slot, prefix);
- // 5. If the constant is a string, register it for the CallGenerator string pool
+ // 5. Register the string constant for the string constant pool if needed
if (value instanceof String s) {
CallGenerator.registerStringConst(ins.dest().id(), s);
}
diff --git a/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java b/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java
index 542a0979be971637c19dcdd221110b808954af42..b4d417c1121549ad64967264e523b8d21fca85ed 100644
--- a/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java
+++ b/src/main/java/org/jcnc/snow/compiler/parser/expression/StringLiteralParselet.java
@@ -1,5 +1,6 @@
package org.jcnc.snow.compiler.parser.expression;
+import org.jcnc.snow.common.StringEscape;
import org.jcnc.snow.compiler.lexer.token.Token;
import org.jcnc.snow.compiler.parser.ast.base.ExpressionNode;
import org.jcnc.snow.compiler.parser.ast.StringLiteralNode;
@@ -26,8 +27,14 @@ public class StringLiteralParselet implements PrefixParselet {
*/
@Override
public ExpressionNode parse(ParserContext ctx, Token token) {
+ // 去除首尾引号
String raw = token.getRaw();
- String content = raw.substring(1, raw.length() - 1);
- return new StringLiteralNode(content, new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName()));
+ String inner = raw.substring(1, raw.length() - 1);
+ // 解析转义符与 Unicode 转义
+ String value = StringEscape.unescape(inner);
+ return new StringLiteralNode(
+ value,
+ new NodeContext(token.getLine(), token.getCol(), ctx.getSourceName())
+ );
}
}
\ No newline at end of file
diff --git a/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java b/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java
index 6c12299062f4e958fb8696bab38aff9271963b12..ef73aed538e8797ca22f699cd7fbdfe5aabbbd96 100644
--- a/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java
+++ b/src/main/java/org/jcnc/snow/vm/commands/ref/control/RPushCommand.java
@@ -10,36 +10,54 @@ import java.util.Collections;
import java.util.List;
/**
- * The {@code RPushCommand} class implements the {@link Command} interface
- * and represents the "reference push" instruction ({@code R_PUSH}) in the virtual machine.
- *
+ * The {@code RPushCommand} class implements the {@link Command} interface and provides
+ * the "reference push" instruction ({@code R_PUSH}) for the virtual machine.
*
- * This instruction pushes a reference-type value onto the operand stack.
- * The input is parsed from the textual instruction form, which can represent:
+ * Function: Pushes a reference-type value (String literal or array literal) onto the operand stack.
+ *
+ *
+ *
Supported Literals
*
- *
String literals
- *
Array literals (e.g., {@code [1, 2, 3]}), including nested arrays
Array Literals: Bracketed array forms (e.g., {@code [1, 2, [3, 4]]}), including nested arrays.
*
- *
*
- *
- * For array literals, a nested list structure is constructed. In this implementation,
- * array literals are pushed as mutable {@link java.util.ArrayList} structures,
- * so that subsequent system calls such as {@code ARR_SET} can modify elements in-place.
- *
+ *
Implementation Details
+ *
+ *
Array literals are parsed into mutable {@link java.util.ArrayList} objects, to support in-place modification (e.g., by {@code ARR_SET}).
+ *
String literals wrapped in quotes are automatically unescaped according to Java string escape rules.
+ *
Handles atomic values: numbers (including hex, binary, float, long, short, byte), booleans, and fallback to string.
+ *
+ * @author (your name or org)
+ * @since 1.0
*/
public class RPushCommand implements Command {
/**
- * Executes the R_PUSH command.
+ * Executes the {@code R_PUSH} instruction. Parses the given literal parameter and pushes it onto the operand stack.
+ *
Quoted string literals (e.g., {@code "abc\n"}), parsed with escape sequence support
+ *
Unquoted raw strings, numbers, and atoms
+ *
*
- * @param parts The parts of the instruction, where {@code parts[1..n]} are concatenated as the literal.
- * @param pc The current program counter.
- * @param stack The operand stack where the result will be pushed.
- * @param local The local variable store (unused in this instruction).
- * @param callStack The call stack (unused in this instruction).
- * @return The new program counter (typically {@code pc+1}).
- * @throws IllegalStateException if no literal parameter is provided.
+ * @param parts The instruction split into parts (opcode and arguments)
+ * @param pc The current program counter
+ * @param stack The operand stack to push the value onto
+ * @param local The local variable store (unused)
+ * @param callStack The call stack (unused)
+ * @return The next program counter (pc + 1)
+ * @throws IllegalStateException if the R_PUSH parameter is missing or parsing fails
*/
@Override
public int execute(String[] parts, int pc, OperandStack stack, LocalVariableStore local, CallStack callStack) {
@@ -54,71 +72,64 @@ public class RPushCommand implements Command {
}
String literal = sb.toString().trim();
- // Check if this is an array literal
+ // Handle array literal
if (literal.startsWith("[") && literal.endsWith("]")) {
Object parsed = parseValue(new Cursor(literal));
if (!(parsed instanceof List> list)) {
- // Should not happen in theory; safety fallback
stack.push(parsed);
} else {
- // Push a deep-mutable copy so ARR_SET can modify elements in-place
stack.push(deepMutable(list));
}
- } else {
- // Regular string, push as-is
+ }
+ // String literal with quotes and escapes
+ else if (literal.length() >= 2 && literal.startsWith("\"") && literal.endsWith("\"")) {
+ String decoded = parseQuoted(new Cursor(literal));
+ stack.push(decoded);
+ }
+ // Raw atom or string
+ else {
stack.push(literal);
}
return pc + 1;
}
/**
- * A simple string cursor, supporting index increment and character reading, for use by the parser.
+ * Utility class for string parsing, used by the array and string literal parsers.
*/
static class Cursor {
final String s;
int i;
/**
- * Constructs a new {@code Cursor} for the given string.
- *
- * @param s The string to parse.
+ * Constructs a cursor over the provided string.
+ * @param s the input string to parse
*/
- Cursor(String s) {
- this.s = s;
- this.i = 0;
- }
+ Cursor(String s) { this.s = s; this.i = 0; }
/**
* Advances the cursor by one character.
*/
- void skip() {
- i++;
- }
+ void skip() { i++; }
/**
- * @return {@code true} if the cursor has reached the end of the string.
+ * Returns true if the cursor has reached the end of the string.
+ * @return true if end of string
*/
- boolean end() {
- return i >= s.length();
- }
+ boolean end() { return i >= s.length(); }
/**
- * Gets the character at the current cursor position.
- *
- * @return current character
- * @throws StringIndexOutOfBoundsException if at end of string
+ * Returns the current character at the cursor position.
+ * @return the current character
*/
- char ch() {
- return s.charAt(i);
- }
+ char ch() { return s.charAt(i); }
}
/**
- * Parses a value from the input string at the current cursor position.
- * This can be an array literal, a quoted string, or a simple atom (number, word).
+ * Parses a value from the current cursor position.
+ * Supports arrays, quoted strings, or atoms.
*
- * @param c The cursor for parsing.
- * @return The parsed value (could be List, String, Number).
+ * @param c the parsing cursor
+ * @return the parsed object (List, String, Number, Boolean, or String fallback)
*/
Object parseValue(Cursor c) {
skipWs(c);
@@ -130,9 +141,8 @@ public class RPushCommand implements Command {
}
/**
- * Skips whitespace characters in the input string.
- *
- * @param c The cursor to advance.
+ * Skips whitespace characters at the cursor.
+ * @param c the parsing cursor
*/
private static void skipWs(Cursor c) {
while (!c.end()) {
@@ -143,13 +153,13 @@ public class RPushCommand implements Command {
}
/**
- * Parses an array literal from the input, including nested arrays.
+ * Parses an array literal of the form [elem1, elem2, ...] (may be nested).
+ * Recursively parses elements using {@link #parseValue(Cursor)}.
*
- * @param c The cursor (positioned at '[' at entry).
- * @return A List representing the parsed array.
+ * @param c the parsing cursor
+ * @return a List of parsed elements
*/
private Object parseArray(Cursor c) {
- // assumes current char is '['
c.skip(); // skip '['
List