diff --git a/SECURITYNOTE.md b/SECURITYNOTE.md
index 96b2ea835449aa974223ee293cbca72ac158a0aa..d35048a2e3fe115d82f4fbc30adfcc686297fc51 100644
--- a/SECURITYNOTE.md
+++ b/SECURITYNOTE.md
@@ -47,6 +47,8 @@
 1. 建议用户结合运行资源状况编写对应训练脚本。若训练脚本与资源状况不匹配，如数据集加载内存大小超出内存容量限制、训练脚本在本地生成数据超过磁盘空间大小等情况，可能引发错误并导致进程意外退出。
 2. MindSpeed-LLM内部用到了PyTorch,可能会因为版本不匹配导致运行错误，具体可参考PyTorch[安全声明](https://gitee.com/ascend/pytorch#%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E)。
 3. 本软件使用PyTorch的torch.load做模型加载，代码中存在该接口使用场景配置参数weights_only=True,对于PyTorch版本<=2.5.1时，存在反序列化漏洞CVE-2025-32434,请用户保障所加载权重的安全性，避免恶意模型加载使执行机/设备遭到攻击。
+4. HumanEval使用了subprocess.run,存在安全风险，为了不影响功能正常使用，做了一些安全校验规避，请用户根据需要自行构建黑名单，完善安全问题。
+5. 因为安全问题，需要设置trust_remote_code=False，无法远程加载Transformer官方仓库未支持的开源模型，如需要，请手动配置--trust-remote-code参量。
 
 ## 公网地址声明
 
diff --git a/configs/dangerous_shell.json b/configs/dangerous_shell.json
new file mode 100644
index 0000000000000000000000000000000000000000..927219fe5391423e6dfca632613c3ba3aeaf0e8e
--- /dev/null
+++ b/configs/dangerous_shell.json
@@ -0,0 +1,9 @@
+[
+    r"os\.(system|popen|exec|setuid|setgid|chroot)\s*\(",
+    r"subprocess\.(run|Popen|call)\s*\(",
+    r"pty\.spawn\s*\(",
+    r"(requests|urllib|socket|httpx)\.(get|post|urlopen|connect)\s*\(",
+    r"open\s*\(",
+    r"os\.(remove|rename|chmod|chown|mkdir)\s*\(",
+    r"(eval|exec|__import__|globals|locals)\s*\("
+]
\ No newline at end of file
diff --git a/convert_ckpt.py b/convert_ckpt.py
index 5cf14112b397b727c7fa23255fe01ddf3f01b852..dbd49ab194dc8a7824b30498f276b8ba726fa68f 100644
--- a/convert_ckpt.py
+++ b/convert_ckpt.py
@@ -79,6 +79,10 @@ def main():
     parser.add_argument('--ckpt-format', default='torch',
                         choices=['torch', 'torch_dist', 'zarr'],
                         help='Checkpoint format to use.')
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')
     
     known_args, _ = parser.parse_known_args()
 
diff --git a/convert_ckpt_v2.py b/convert_ckpt_v2.py
index e227590f0c767b43fae777abc89b2e5305ee5be5..4dfe1f8032d411ba5e629b3b554ada7cea8e2eb2 100644
--- a/convert_ckpt_v2.py
+++ b/convert_ckpt_v2.py
@@ -48,6 +48,10 @@ def get_args():
                         help='Customizing the number of dense layers.')
     parser.add_argument('--num-layers', type=int, default=None,
                         help='Specify the number of transformer layers to use.')
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust remote code for transformer.from_ptretrain')
 
     args, _ = parser.parse_known_args()
     return args
diff --git a/evaluation.py b/evaluation.py
index 11d096f0f920d84453fc31e92f6816448157120e..9a6467301e87028b2e8150531c2cfe853200e4b7 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -378,7 +378,7 @@ def main():
         model_provider=model_provider,
         pretrained_model_name_or_path=args.load
     )
-    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name_or_path, trust_remote_code=True, local_files_only=True)
+    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name_or_path, trust_remote_code=args.trust_remote_code, local_files_only=True)
 
     rank = dist.get_rank()
     if 'cmmlu' in args.task:
diff --git a/examples/mcore/deepseek3/convert_ckpt_deepseek3.py b/examples/mcore/deepseek3/convert_ckpt_deepseek3.py
index e99ed193d8c594b6716ddb6da8ed30720fe94894..dfa15e27a9990739c2577c0f6cb7d77eb5b83417 100644
--- a/examples/mcore/deepseek3/convert_ckpt_deepseek3.py
+++ b/examples/mcore/deepseek3/convert_ckpt_deepseek3.py
@@ -12,7 +12,7 @@ import safetensors
 import torch
 import safetensors.torch
 import bitsandbytes as bnb
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)
 
@@ -75,8 +75,8 @@ class CkptConvert(object):
         self.vpp_stage = vpp_stage
         if vpp_stage is not None:
             self.vpp_size = self.num_layers // self.pp_size // self.vpp_stage
-        self.hf_model_path = hf_model_path
-        self.mg_save_path = mg_save_path
+        self.hf_model_path = standardize_path(hf_model_path, check_read=True)
+        self.mg_save_path = standardize_path(mg_save_path, check_write=True)
         self.num_layer_list = num_layer_list
         self.noop_layers = noop_layers
         self.moe_grouped_gemm = moe_grouped_gemm
@@ -140,7 +140,7 @@ class CkptConvert(object):
         """megatron model path"""
         iter_mg_path = os.path.join(mg_path, "iter_0000001")
         if not os.path.exists(mg_path):
-            os.makedirs(mg_path, exist_ok=True)
+            os.makedirs(mg_path, mode=0o750, exist_ok=True)
 
         with open(os.path.join(mg_path, "latest_checkpointed_iteration.txt"), 'w') as f:
             f.write("1")
@@ -788,7 +788,7 @@ class CkptConvert(object):
                     for tp_rank in range(self.tp_size):
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
 
@@ -847,7 +847,7 @@ class CkptConvert(object):
                     for tp_rank in range(self.tp_size):
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path, exist_ok=True)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
                         model_dict = {"checkpoint_version": 3.0, "iteration": 1}
diff --git a/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py b/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
index 648814fb7ed9efdaae461a9c7f4dfb24108ab335..d2b9922c661c233788703002034189a8cbe3b564 100644
--- a/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
+++ b/examples/mcore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
@@ -14,7 +14,7 @@ import tqdm
 import torch
 import torch_npu
 import safetensors.torch
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)
 
@@ -73,15 +73,15 @@ class MgCkptConvert(object):
         self.ep_size = ep_size
         self.vpp_stage = vpp_stage
 
-        self.mg_model_path = mg_model_path
-        self.hf_save_path = hf_save_path
+        self.mg_model_path = standardize_path(mg_model_path, check_read=True)
+        self.hf_save_path = standardize_path(hf_save_path, check_write=True)
         self.lora_model_path = lora_model_path
         self.iter_path = self.get_iter_path(self.mg_model_path)
         if self.lora_model_path is not None:
             self.lora_iter_path = self.get_iter_path(self.lora_model_path)
 
         if not os.path.exists(self.hf_save_path):
-            os.makedirs(self.hf_save_path)
+            os.makedirs(self.hf_save_path, mode=0o750, exist_ok=True)
 
         self.num_layers = num_layers
         self.noop_layers = noop_layers
@@ -194,7 +194,7 @@ class MgCkptConvert(object):
 
         directory = os.path.join(ckpt_path, f'iter_{iteration:07d}')
 
-        os.makedirs(directory, exist_ok=True)
+        os.makedirs(directory, mode=0o750, exist_ok=True)
 
         return directory
 
diff --git a/examples/mindspore/deepseek3/convert_ckpt_deepseek3.py b/examples/mindspore/deepseek3/convert_ckpt_deepseek3.py
index af47701c6ea042bf1a834ef8d2f1ebeb9723061a..32cf931d048d05532a8449a240c99155a5e3997c 100644
--- a/examples/mindspore/deepseek3/convert_ckpt_deepseek3.py
+++ b/examples/mindspore/deepseek3/convert_ckpt_deepseek3.py
@@ -146,7 +146,7 @@ class CkptConvert(object):
         """megatron model path"""
         iter_mg_path = os.path.join(mg_path, "iter_0000001")
         if not os.path.exists(mg_path):
-            os.makedirs(mg_path, exist_ok=True)
+            os.makedirs(mg_path, mode=0o750, exist_ok=True)
 
         with open(os.path.join(mg_path, "latest_checkpointed_iteration.txt"), 'w') as f:
             f.write("1")
@@ -794,7 +794,7 @@ class CkptConvert(object):
                     for tp_rank in range(self.tp_size):
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path)
+                        os.makedirs(parallel_save_path, mode=0o750)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
 
@@ -853,7 +853,7 @@ class CkptConvert(object):
                     for tp_rank in range(self.tp_size):
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(save_model_path, save_prefix)
-                        os.makedirs(parallel_save_path, exist_ok=True)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
                         model_dict = {"checkpoint_version": 3.0, "iteration": 1}
diff --git a/examples/mindspore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py b/examples/mindspore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
index bdd3a326bcf4978fc93708d32443c4a912079496..814e6cac29fc5b54e309655283a11bcbef0735f5 100644
--- a/examples/mindspore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
+++ b/examples/mindspore/deepseek3/convert_ckpt_deepseek3_mcore2hf.py
@@ -87,7 +87,7 @@ class MgCkptConvert(object):
             self.lora_iter_path = self.get_iter_path(self.lora_model_path)
 
         if not os.path.exists(self.hf_save_path):
-            os.makedirs(self.hf_save_path)
+            os.makedirs(self.hf_save_path, mode=0o750)
 
         self.num_layers = num_layers
         self.noop_layers = noop_layers
@@ -200,7 +200,7 @@ class MgCkptConvert(object):
 
         directory = os.path.join(ckpt_path, f'iter_{iteration:07d}')
 
-        os.makedirs(directory, exist_ok=True)
+        os.makedirs(directory, mode=0o750, exist_ok=True)
 
         return directory
 
diff --git a/mindspeed_llm/core/datasets/gpt_dataset.py b/mindspeed_llm/core/datasets/gpt_dataset.py
index 63c59dba66b912733e7fcede160f62d923e33c37..1bf76fd937934c12384b3ba5ab1c44be202d8a5f 100644
--- a/mindspeed_llm/core/datasets/gpt_dataset.py
+++ b/mindspeed_llm/core/datasets/gpt_dataset.py
@@ -16,6 +16,7 @@ from megatron.core.datasets.gpt_dataset import (_build_document_index,
                                                 _build_shuffle_index
                                                 )
 from mindspeed_llm.tasks.utils.error_utils import GPTDatasetSampleIndexError
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .blended_megatron_dataset_builder import need_to_build_dataset
 
 logger = logging.getLogger(__name__)
@@ -70,6 +71,7 @@ def _build_document_sample_shuffle_indices(
         path_to_cache = os.path.join(
             self.dataset.path_prefix, "cache", f"{type(self).__name__}_indices"
         )
+    path_to_cache = standardize_path(path_to_cache, check_write=True)
 
     # start of megatron_adaptation,
     # here we change from (class)GPTDataset._build_document_sample_shuffle_indices
@@ -198,8 +200,7 @@ def _build_document_sample_shuffle_indices(
         )
 
         if any(sample_index[:, 0] < 0):
-            _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98"
-            raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information")
+            raise GPTDatasetSampleIndexError(f"Bad sample index.")
 
         # Build the shuffle index
         if separate_final_epoch:
@@ -212,7 +213,7 @@ def _build_document_sample_shuffle_indices(
             )
 
         if path_to_cache:
-            os.makedirs(path_to_cache, exist_ok=True)
+            os.makedirs(path_to_cache, mode=0o750, exist_ok=True)
             # Write the description
             with open(path_to_description, "wt") as writer:
                 writer.write(self.unique_description)
@@ -258,8 +259,7 @@ def _build_document_sample_shuffle_indices(
     sample_index = numpy.load(path_to_sample_index, allow_pickle=True, mmap_mode='r')
 
     if any(sample_index[:, 0] < 0):
-        _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98"
-        raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information")
+        raise GPTDatasetSampleIndexError(f"Bad sample index.")
 
     t_end = time.time()
     log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")
@@ -345,10 +345,10 @@ def _get_ltor_masks_and_position_ids(
             i = eod_index[j]
             # Mask attention loss.
             if reset_attention_mask and attention_mask is not None:
-                attention_mask[0, (i + 1) :, : (i + 1)] = 0
+                attention_mask[0, (i + 1):, :(i + 1)] = 0
             # Reset positions.
             if reset_position_ids:
-                position_ids[(i + 1) :] -= i + 1 - prev_index
+                position_ids[(i + 1):] -= i + 1 - prev_index
                 prev_index = i + 1
 
     if attention_mask is not None:
diff --git a/mindspeed_llm/core/distributed/finalize_model_grads.py b/mindspeed_llm/core/distributed/finalize_model_grads.py
index 17a0c8605fdffb9c6c1c0ac2d243146e8a1e024e..4b7e41935b02b0c2e4417644021c347003a16cfa 100644
--- a/mindspeed_llm/core/distributed/finalize_model_grads.py
+++ b/mindspeed_llm/core/distributed/finalize_model_grads.py
@@ -36,12 +36,7 @@ def allreduce_layernorm_grads(model: List[torch.nn.Module], config: TransformerC
             for name, param in get_attr_wrapped_model(model_chunk, 'named_parameters')():
                 if not param.requires_grad:
                     continue
-                if (
-                    param.requires_grad
-                    and getattr(param, 'sequence_parallel', False)
-                    or 'q_layernorm' in name
-                    or 'k_layernorm' in name
-                ):
+                elif getattr(param, 'sequence_parallel', False) or 'q_layernorm' in name or 'k_layernorm' in name:
                     grad = param.main_grad
                     grads.append(grad.data)
         if grads:
@@ -54,7 +49,7 @@ def allreduce_layernorm_grads(model: List[torch.nn.Module], config: TransformerC
 
     layer_norm_2d_grads = []
     for model_chunk in model:
-        for name, param in get_attr_wrapped_model(model_chunk, "named_parameters")():
+        for _, param in get_attr_wrapped_model(model_chunk, "named_parameters")():
             if param.requires_grad and getattr(param, "2d_tp", False):
                 layer_norm_2d_grad = param.main_grad
                 layer_norm_2d_grads.append(layer_norm_2d_grad.data)
diff --git a/mindspeed_llm/core/models/gpt/gpt_model.py b/mindspeed_llm/core/models/gpt/gpt_model.py
index 070846b64f0f7e6ac77172ec464796e8b29526dd..196e7972311a4e95e1785ff56d06b09d35bab0b8 100644
--- a/mindspeed_llm/core/models/gpt/gpt_model.py
+++ b/mindspeed_llm/core/models/gpt/gpt_model.py
@@ -23,9 +23,8 @@ from megatron.core.utils import deprecate_inference_params
 from megatron.core.inference.contexts import BaseInferenceContext
 from megatron.training import get_args
 
-from mindspeed_llm.core.tensor_parallel.layers import SegmentedColumnParallelLinear
-
 from mindspeed.utils import get_actual_seq_len, compute_qkv_index, get_position_ids
+from mindspeed_llm.core.tensor_parallel.layers import SegmentedColumnParallelLinear
 
 
 class GPTModel(MegatronCoreGPTModel):
diff --git a/mindspeed_llm/core/parallel_state.py b/mindspeed_llm/core/parallel_state.py
index 5a69b1cf913c0301728e3d217b8b092d4ee6a8c4..69d2c2c2b687a2bd16e16abecf5856bc40d6aeb3 100644
--- a/mindspeed_llm/core/parallel_state.py
+++ b/mindspeed_llm/core/parallel_state.py
@@ -26,6 +26,8 @@ from mindspeed.core.parallel_state import (initialize_context_parallel_group_for
                                            initialize_context_parallel_group_for_hybrid_cp,
                                            initialize_context_parallel_group_for_double_ring)
 
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
+
 _EXPERT_PARALLEL_GROUP = None
 _MPU_EXPERT_MODEL_PARALLEL_RANK = None
 _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None
@@ -58,6 +60,8 @@ def initialize_model_parallel_decorator(initialize_model_parallel):
         from megatron.training.utils import print_rank_0
         timeout = timedelta(minutes=distributed_timeout_minutes)
 
+        nccl_communicator_config_path = standardize_path(nccl_communicator_config_path, check_read=True)
+
         if pipeline_model_parallel_size == 2 and virtual_pipeline_model_parallel_size is not None:
             megatron.core.parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = 0
             megatron.core.parallel_state._VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = virtual_pipeline_model_parallel_size
diff --git a/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py b/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py
index e6d9c09998520fad2036c35df27a77169b4ca35a..ad7b9f58bad7c650767a6db73b34a8378d5c8008 100644
--- a/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py
+++ b/mindspeed_llm/core/pipeline_parallel/dualpipe/adaptor.py
@@ -17,7 +17,7 @@ try:
     from mindspeed_llm.core.pipeline_parallel.dualpipe.gpt_model import gpt_model_forward_backward_overlaping
     from mindspeed_llm.core.pipeline_parallel.dualpipe.MTP_overlap import forward_overlap
 except ImportError:
-    pass
+    print("[warning] failed import dualpipe modules, not support dualpipe")
 
 
 def dualpipe_register_patches(MegatronAdaptation):
diff --git a/mindspeed_llm/core/ssm/mamba_mixer.py b/mindspeed_llm/core/ssm/mamba_mixer.py
index b2ab807a3baeef4e98270192efedddebfadc18d0..f710e3fa4767dcbe9472cec1ddca191a19e6a988 100644
--- a/mindspeed_llm/core/ssm/mamba_mixer.py
+++ b/mindspeed_llm/core/ssm/mamba_mixer.py
@@ -106,7 +106,7 @@ def mamba_mixer_forward(self, hidden_states, seqlen=None, seq_idx=None, cu_seqle
         # Compute short convolution
         if conv_state is not None:
             if cu_seqlens:
-                raise('Variable length inputs in convolution are not currently supported')
+                raise 'Variable length inputs in convolution are not currently supported'
             # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
             # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
             conv_state.copy_(
@@ -115,7 +115,7 @@ def mamba_mixer_forward(self, hidden_states, seqlen=None, seq_idx=None, cu_seqle
 
         seqlen = xBC.size(2)
         if seq_idx:
-            raise('Variable length inputs in convolution are not currently supported')
+            raise 'Variable length inputs in convolution are not currently supported'
         xBC = self.act(self.conv1d(xBC)[..., :seqlen])
 
         # transpose b pd l --> b l pd
diff --git a/mindspeed_llm/core/transformer/moe/layers.py b/mindspeed_llm/core/transformer/moe/layers.py
index 35d942a4d3e0a0bb4ef1f46d0731f69e3b3f4e0e..cefa27e79e0cb2829611ac757de98ea2a161bff2 100644
--- a/mindspeed_llm/core/transformer/moe/layers.py
+++ b/mindspeed_llm/core/transformer/moe/layers.py
@@ -96,7 +96,7 @@ class SEColumnParallelLinear(megatron.core.tensor_parallel.ColumnParallelLinear)
                 )
 
         if self.config._cpu_offloading_context is not None:
-            if self.config._cpu_offloading_context.inside_context == True:
+            if self.config._cpu_offloading_context.inside_context:
                 assert (
                     self.config.cpu_offloading == False
                 ), "CPU Offloading cannot be enabled while using non-TE modules"
@@ -197,7 +197,7 @@ class SERowParallelLinear(megatron.core.tensor_parallel.RowParallelLinear):
         """
 
         if self.config._cpu_offloading_context is not None:
-            if self.config._cpu_offloading_context.inside_context == True:
+            if self.config._cpu_offloading_context.inside_context:
                 assert (
                     self.config.cpu_offloading == False
                 ), "CPU Offloading cannot be enabled while using non-TE modules"
diff --git a/mindspeed_llm/core/transformer/moe/router.py b/mindspeed_llm/core/transformer/moe/router.py
index 29c342448aac9ff2bc5954ac533b232d2ba24d09..4a919cfd5eca18e1b29bcc3404307c706dd36959 100644
--- a/mindspeed_llm/core/transformer/moe/router.py
+++ b/mindspeed_llm/core/transformer/moe/router.py
@@ -399,8 +399,10 @@ def apply_seq_aux_loss(self, activation, logits, topk_idx):
     scores_for_aux = scores  # [s*b, n_global_experts]
     topk_idx_for_aux_loss = topk_idx.view(args.micro_batch_size, -1)  # [b, s*top_k]
     scores_for_seq_aux = scores_for_aux.view(args.micro_batch_size, seq_length, -1)
-    ce = torch.stack([torch.histc(x.to(torch.int32), bins=args.num_experts, min=0, max=args.num_experts) for x in
-                      topk_idx_for_aux_loss])
+    ce = torch.stack([
+        torch.histc(x.to(torch.int32), bins=args.num_experts, min=0, max=args.num_experts)
+        for x in topk_idx_for_aux_loss
+    ])
 
     num_sub_sequence = 1
     sequence_partition_group = parallel_state.get_context_parallel_group()
diff --git a/mindspeed_llm/core/transformer/multi_token_prediction.py b/mindspeed_llm/core/transformer/multi_token_prediction.py
index 194ce49b9e8db4e02ae4db3fa1479f1ee8cc9342..10939db33c23b1562a22a8ea8f79f9cf8f7d14f9 100644
--- a/mindspeed_llm/core/transformer/multi_token_prediction.py
+++ b/mindspeed_llm/core/transformer/multi_token_prediction.py
@@ -227,7 +227,7 @@ def mtp_block_forward(
         embedding.word_embeddings.weight = get_shared_embedding_from_dual_chunk()
 
     hidden_states_main_model = hidden_states
-    for layer_number in range(len(self.layers)):
+    for layer_number, _ in enumerate(self.layers):
         # get input_data from mtp_batch_list or not
         input_ids, position_ids, labels, loss_mask, attention_mask = get_mtp_layer_input(
         (input_ids, position_ids, labels, loss_mask, attention_mask), mtp_batch_list, layer_number)
diff --git a/mindspeed_llm/features_manager/arguments/deprecated_args.py b/mindspeed_llm/features_manager/arguments/deprecated_args.py
index 98cf8d52bff4293281533cda8e2fe321745f1dc3..618aefc5a6d3038ad1131d966de8931da150a7a9 100644
--- a/mindspeed_llm/features_manager/arguments/deprecated_args.py
+++ b/mindspeed_llm/features_manager/arguments/deprecated_args.py
@@ -50,6 +50,10 @@ class DeprecatedArgsFeature(MindSpeedFeature):
         group.add_argument('--rope-scaling-beta-slow', type=int, default=None, dest='deprecated_rope_scaling_beta_slow',
                            help='Yarn rope: rope beta slow'
                                 'Note: this option is deprecated, please use --beta-slow instead!')
+        group.add_argument('--trust-remote-code',
+                           action='store_true',
+                           default=False,
+                           help='enable trust remote code for transformer.from_ptretrain')
 
     def validate_args(self, args):
         # If deprecated argument are used instead of new argument, we assign the deprecated argument to the new argument and issue a warning
@@ -58,6 +62,11 @@ class DeprecatedArgsFeature(MindSpeedFeature):
                 """The '--use-deter-comp' argument is deprecated and will be removed in the next future version, 
                    please use '--npu-deterministic' instead!""", DeprecationWarning)
             args.npu_deterministic = args.deprecated_use_deter_comp
+        if args.trust_remote_code:
+            print("""The '--trust-remote-code' argument is not safe, please be careful!!!""")
+        else:
+            print(
+                """The '--trust-remote-code' is not be set, some models will be failed to load from transformers!!!""")
         if args.deprecated_use_mc2 and not args.use_ascend_mc2:
             warnings.warn(
                 """The '--use-mc2' argument is deprecated and will be removed in the next future version, 
diff --git a/mindspeed_llm/legacy/model/transformer.py b/mindspeed_llm/legacy/model/transformer.py
index fcefd0b2de7296feb07047c12ee16ae802994497..55ac368a8ab6c72daa1bcb50862b3879b44535a6 100644
--- a/mindspeed_llm/legacy/model/transformer.py
+++ b/mindspeed_llm/legacy/model/transformer.py
@@ -487,16 +487,16 @@ class FlashSelfAttention(torch.nn.Module):
             if q.shape[1] == 1 and q.shape[1] != seq_length:
                 output = torch_npu.npu_incre_flash_attention( \
                     q, k, v, \
-                    num_heads=head_num, 
+                    num_heads=head_num,
                     input_layout="BSH", \
                     pse_shift=pse, \
                     padding_mask=None, \
-                    scale_value=scale, 
+                    scale_value=scale,
                 )
             else:
                 output = torch_npu.npu_prompt_flash_attention( \
                     q, k, v, \
-                    num_heads=head_num, 
+                    num_heads=head_num,
                     input_layout="BSH", \
                     pse_shift=pse, \
                     sparse_mode=sparse_mode, \
@@ -520,7 +520,7 @@ class FlashSelfAttention(torch.nn.Module):
                 keep_prob=1 - self.dropout_p, \
                 inner_precise=0
             )[0]
-        
+
         return output
 
 
@@ -763,7 +763,7 @@ def ParallelAttentionForward(self, hidden_states, attention_mask,
                 # In inference, we compute one token at a time.
                 # Select the correct positional embedding
                 # (only the last token in the sequence)
-                q_pos_emb = q_pos_emb[sequence_end - 1 : sequence_end]
+                q_pos_emb = q_pos_emb[sequence_end - 1: sequence_end]
             else:
                 # In the first forward pass of inference,
                 # we use the entire provided prefix.
diff --git a/mindspeed_llm/tasks/checkpoint/convert.py b/mindspeed_llm/tasks/checkpoint/convert.py
index 0f054757fa21e0160182c1ea69a7b35dd758c99d..1ebb44727869121c8e5b4f6611fbdcf3e3703e63 100644
--- a/mindspeed_llm/tasks/checkpoint/convert.py
+++ b/mindspeed_llm/tasks/checkpoint/convert.py
@@ -44,7 +44,7 @@ class Convert(abc.ABC):
         """megatron model path"""
         iter_mg_path = os.path.join(mg_path, "iter_0000001")
         if not os.path.exists(mg_path):
-            os.makedirs(mg_path, exist_ok=True)
+            os.makedirs(mg_path, mode=0o750, exist_ok=True)
         with open(os.path.join(mg_path, "latest_checkpointed_iteration.txt"), 'w') as f:
             f.write("1")
         return iter_mg_path
diff --git a/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py b/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py
index a889b86a1c3995a0de1f035bf4d91016eb048e49..7346deed43f0c362d0b92cabdb9f775ba07cbd40 100644
--- a/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_ckpt_mamba2.py
@@ -8,6 +8,7 @@ import logging as logger
 import argparse
 import torch
 import safetensors.torch
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)
 
@@ -67,7 +68,7 @@ class CheckpointConverter:
 
             try:
                 if filename.endswith(".bin"):
-                    cur_weights = torch.load(file_path, map_location=torch.device('cpu'))
+                    cur_weights = torch.load(file_path, map_location=torch.device('cpu'), weights_only=False)
                     model_dict.update(cur_weights)
                     print(f"Successfully loaded: {filename}")
                     loaded = True
@@ -475,7 +476,7 @@ class CheckpointConverter:
             dir_name += f"_{pp_idx:03d}"
 
         save_path = os.path.join(args.save_dir, f"iter_{out_iteration:07d}", dir_name)
-        os.makedirs(save_path, exist_ok=True)
+        os.makedirs(save_path, mode=0o750, exist_ok=True)
 
         return os.path.join(save_path, filename)
 
@@ -589,6 +590,8 @@ def run():
 
     args, _ = parser.parse_known_args()
 
+    args.load_dir = standardize_path(args.load_dir, check_read=True)
+
     converter = CheckpointConverter(args)
     converter.main()
 
diff --git a/mindspeed_llm/tasks/checkpoint/convert_hf2mg.py b/mindspeed_llm/tasks/checkpoint/convert_hf2mg.py
index b385a78d18b7e10a2dbb0f99142e9e82e4c5d290..1ee6f67d69a069d9c06e30b4bf2c521d5b217259 100644
--- a/mindspeed_llm/tasks/checkpoint/convert_hf2mg.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_hf2mg.py
@@ -922,7 +922,7 @@ class Hf2MgConvert(Convert):
                             continue
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(self.save_dir, save_prefix)
-                        os.makedirs(parallel_save_path, exist_ok=True)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
 
@@ -984,7 +984,7 @@ class Hf2MgConvert(Convert):
                             continue
                         save_prefix = self.generate_mg_weights_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank)
                         parallel_save_path = os.path.join(self.save_dir, save_prefix)
-                        os.makedirs(parallel_save_path, exist_ok=True)
+                        os.makedirs(parallel_save_path, mode=0o750, exist_ok=True)
                         save_file_name = os.path.join(parallel_save_path, "model_optim_rng.pt")
                         logger.info(f"Saving to {save_file_name}")
                         model_dict = {"args" : args, "checkpoint_version" : 3.0, "iteration" : 1}
diff --git a/mindspeed_llm/tasks/checkpoint/convert_mg2hf.py b/mindspeed_llm/tasks/checkpoint/convert_mg2hf.py
index 37d296aa1de7cc1c6afb80f2460f8104e09135d5..4f4e94460bb727aedd0848553f98ad3bbd16578e 100644
--- a/mindspeed_llm/tasks/checkpoint/convert_mg2hf.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_mg2hf.py
@@ -46,7 +46,7 @@ class Mg2HfConvert(Convert):
 
 
         if not os.path.exists(self.save_dir):
-            os.makedirs(self.save_dir)
+            os.makedirs(self.save_dir, mode=0o750)
 
         self.tensor_model_parallel_size = self.load_model.tensor_model_parallel_size
         self.pipeline_model_parallel_size = self.load_model.pipeline_model_parallel_size
@@ -126,7 +126,7 @@ class Mg2HfConvert(Convert):
 
         directory = os.path.join(ckpt_path, f'iter_{iteration:07d}')
 
-        os.makedirs(directory, exist_ok=True)
+        os.makedirs(directory, mode=0o750, exist_ok=True)
 
         return directory
 
diff --git a/mindspeed_llm/tasks/checkpoint/convert_param.py b/mindspeed_llm/tasks/checkpoint/convert_param.py
index c3ef16bd562e25919c360bfd9e3129a5791507fb..9158525ba46fb0540a05fd3d92cdefc9ffb6c677 100644
--- a/mindspeed_llm/tasks/checkpoint/convert_param.py
+++ b/mindspeed_llm/tasks/checkpoint/convert_param.py
@@ -20,6 +20,7 @@ import stat
 import time
 
 import torch
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 
 def get_json_from_file(json_file):
@@ -123,10 +124,11 @@ class ConvertBase:
         self.mg_latest_ckpt_file_name = "latest_checkpointed_iteration.txt"
 
         # hf model index_file
-        self.model_index_file = os.path.join(
-            self.args_cmd.hf_dir,
-            "pytorch_model.bin.index.json") if self.args_cmd.model_index_file is None \
+        index_file = os.path.join(self.args_cmd.hf_dir, "pytorch_model.bin.index.json")
+        self.model_index_file = index_file if self.args_cmd.model_index_file is None \
             else self.args_cmd.model_index_file
+        self.model_index_file = standardize_path(self.model_index_file, check_read=True)
+
         self.model_index_map = get_json_from_file(self.model_index_file)
         # hf model config_file
         self.config_file = os.path.join(
@@ -550,17 +552,18 @@ class ConvertHf2Mg(ConvertBase):
                                                             ep_rank=ep_rank)
                         save_dir = self.get_mg_model_save_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=ep_rank,
                                                               iteration=iteration)
-                        os.makedirs(save_dir, exist_ok=True)
+                        os.makedirs(save_dir, mode=0o750, exist_ok=True)
                         torch.save(model_dict, os.path.join(save_dir, self.mg_model_file_name))
                 else:  # Dense Model
                     model_dict = self._set_dense_mg_model(hf_model=hf_model, tp_rank=tp_rank, pp_rank=pp_rank)
                     save_dir = self.get_mg_model_save_dir(tp_rank=tp_rank, pp_rank=pp_rank, ep_rank=None,
                                                           iteration=iteration)
-                    os.makedirs(save_dir, exist_ok=True)
+                    os.makedirs(save_dir, mode=0o750, exist_ok=True)
                     torch.save(model_dict, os.path.join(save_dir, self.mg_model_file_name))
 
         # write latest_checkpointed_iteration.txt
         latest_ckpt_file_path = os.path.join(self.args_cmd.mg_dir, self.mg_latest_ckpt_file_name)
+        latest_ckpt_file_path = standardize_path(latest_ckpt_file_path, check_write=True)
         modes = stat.S_IWUSR | stat.S_IRUSR | stat.S_IWGRP | stat.S_IRGRP
         with os.fdopen(os.open(latest_ckpt_file_path, flags=os.O_RDWR | os.O_CREAT, mode=modes), 'w') as fout:
             fout.write(iteration)
@@ -757,7 +760,7 @@ class ConvertMg2Hf(ConvertBase):
             if self.get_hf_model_file_based_param_key(param_key) == model_file:
                 exist_model[param_key] = hf_model[param_key]
 
-        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        os.makedirs(os.path.dirname(file_path), mode=0o750, exist_ok=True)
         torch.save(exist_model, file_path)
 
     def run(self):
diff --git a/mindspeed_llm/tasks/checkpoint/loader_hf.py b/mindspeed_llm/tasks/checkpoint/loader_hf.py
index 0d6e50fcacfb1966bfd8a95d15760e9b52bf954e..cd84e11d743bd6a43e3d666d9f839ba62a069640 100644
--- a/mindspeed_llm/tasks/checkpoint/loader_hf.py
+++ b/mindspeed_llm/tasks/checkpoint/loader_hf.py
@@ -376,7 +376,7 @@ def _load_checkpoint(model_provider, queue, args):
     md = build_metadata(args, margs)
     queue.put(md)
 
-    model_hf.get_modules_from_pretrained()
+    model_hf.get_modules_from_pretrained(trust_remote_code=args.trust_remote_code)
     model_mg.get_modules_from_config()
 
     model_mg.update_module(model_hf)
diff --git a/mindspeed_llm/tasks/checkpoint/models.py b/mindspeed_llm/tasks/checkpoint/models.py
index efcdc498b78c3a5e0a93b58d9dba22885292e5ba..69f0ae9d793ae4280897cafeef4c9d000370ffab 100644
--- a/mindspeed_llm/tasks/checkpoint/models.py
+++ b/mindspeed_llm/tasks/checkpoint/models.py
@@ -2,6 +2,7 @@
 import abc
 import os
 import sys
+import ast
 import re
 import json
 from types import SimpleNamespace
@@ -23,6 +24,7 @@ from megatron.core import tensor_parallel
 from mindspeed_llm.training.utils import parse_args
 from mindspeed_llm.training.training import model_provider_func_wrapper
 from mindspeed_llm.training.checkpointing import load_checkpoint_wrapper
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logger.basicConfig(format="")
 logger.getLogger().setLevel(logger.INFO)
@@ -468,8 +470,10 @@ class HuggingfaceModel(ModelBase):
         # Read huggingface args.
         if self.args_cmd.save_model_type == 'hf':
             cfg_dir = self.args_cmd.save_dir
+            cfg_dir = standardize_path(self.args_cmd.save_dir, check_write=True)
         else:
             cfg_dir = self.args_cmd.load_dir
+            cfg_dir = standardize_path(self.args_cmd.load_dir, check_write=True)
         llama_args_path = os.path.join(cfg_dir, "config.json")
         with open(llama_args_path) as f:
             self.args = json.load(f)
@@ -503,7 +507,7 @@ class HuggingfaceModel(ModelBase):
         self.args.save_lora_to_hf = self.args_cmd.save_lora_to_hf
         self.args.noop_layers = self.args_cmd.noop_layers
 
-    def get_modules_from_config(self, device_map="cpu", trust_remote_code=True):
+    def get_modules_from_config(self, device_map="cpu", trust_remote_code=False):
         # Load Huggingface model.
         if self.args_cmd.save_model_type == "hf":
             load_dir = self.args_cmd.save_dir
@@ -515,9 +519,9 @@ class HuggingfaceModel(ModelBase):
         hf_model.to_empty(device=device_map)
         self.module = [hf_model]
         if hasattr(self.args, "torch_dtype") and self.args.torch_dtype in ["float16", "bfloat16"]:
-            self.module[0] = self.module[0].to(eval(f'torch.{self.args.torch_dtype}'))
+            self.module[0] = self.module[0].to(ast.literal_eval(f'torch.{self.args.torch_dtype}'))
 
-    def get_modules_from_pretrained(self, device_map="cpu", trust_remote_code=True):
+    def get_modules_from_pretrained(self, device_map="cpu", trust_remote_code=False):
         # Load Huggingface model.
         if self.args_cmd.save_model_type == "hf":
             load_dir = self.args_cmd.save_dir
@@ -544,7 +548,8 @@ class HuggingfaceModel(ModelBase):
             )
             self.module = [get_peft_model(self.module[0], lora_config)]        
         if hasattr(self.args, "torch_dtype") and self.args.torch_dtype in ["float16", "bfloat16"]:
-            self.module[0] = self.module[0].to(eval(f'torch.{self.args.torch_dtype}'))
+            dtype = getattr(torch, self.args.torch_dtype)
+            self.module[0] = self.module[0].to(dtype)
 
     def get_lora_key(self, layer_name, prefix):
         return f"{layer_name}.{prefix}"
diff --git a/mindspeed_llm/tasks/checkpoint/saver.py b/mindspeed_llm/tasks/checkpoint/saver.py
index 8d23ad83c38f3a811ed53994247d448c994a7d9d..d6dbabe1354ba2807936a7abe1b8e3cda80cab85 100644
--- a/mindspeed_llm/tasks/checkpoint/saver.py
+++ b/mindspeed_llm/tasks/checkpoint/saver.py
@@ -542,9 +542,9 @@ def save_huggingface(args, model):
     from .models import get_huggingface_model
     model_hf = get_huggingface_model(args)
     if args.load_hf_from_config:
-        model_hf.get_modules_from_config()
+        model_hf.get_modules_from_config(trust_remote_code=args.trust_remote_code)
     else:
-        model_hf.get_modules_from_pretrained()
+        model_hf.get_modules_from_pretrained(trust_remote_code=args.trust_remote_code)
     args_cmd = model_hf.get_args_cmd()
 
     model_hf.update_module(model)
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py
index 31beea4ae66512ac2d30205f6d2c968c24f54fe6..b75d5039f7fdb3e0b04c3449c45e59308abf5256 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/agi_eval.py
@@ -34,7 +34,7 @@ from mindspeed_llm.tasks.evaluation.eval_utils.agi_utils import (
     get_default_instruction, 
     get_pred_postprocess_func
 )
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger = logging.getLogger(__name__)
 
 
@@ -42,7 +42,7 @@ class AGIEvalExam(DatasetEval):
     def __init__(self, test_dir, eval_args,
                  instruction_template="{fewshot_template}   {question}\n{question_template}\n{options}"
                                       "\n{answer_template}"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.batch_size = eval_args.evaluation_batch_size
         self.rank = dist.get_rank()
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py
index 6f5cc3e3546ca6577f07516b92b70e6a60d89969..fe9d32aa264a2531a8a7be2298d5e754cdcbb025 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/bbh_eval.py
@@ -32,7 +32,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.dataset_eval import DatasetEval
 from mindspeed_llm.tasks.evaluation.eval_impl.template import BBH_TEMPLATE_DIR, BBH_COT_TEMPLATE_DIR, get_eval_template
 from mindspeed_llm.tasks.evaluation.eval_utils.bbh_utils import bbh_mcq_postprocess, bbh_freeform_postprocess, bbh_true_or_false_questions
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logger = logging.getLogger(__name__)
 
@@ -75,7 +75,7 @@ bbh_free_form_sets = [
 class BBHEval(DatasetEval):
     def __init__(self, test_dir, eval_args,
                  instruction_template="{fewshot_template}Q: {question}\nA:"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.batch_size = eval_args.evaluation_batch_size
         self.rank = dist.get_rank()
@@ -169,7 +169,7 @@ class BBHEval(DatasetEval):
                         result_mapping = {value.strip(): key for key, value in re.findall(r'\(([A-Z])\)\s*([^\(\)]+)', instruction[-1][:answer_idx])} 
                 elif args.chain_of_thought:
                     instruction = bbh_template.get(subject_name)
-                    target_question = "Q: " + item['input']
+                    target_question = "Q: " + item['input']  # item['input'] is not path-info
                     instruction += target_question
                     instruction += "\nA: Let's think step by step."
                     instructions.append(instruction)
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py b/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py
index 847f7b3b19768aede95b50ed4328b5e2f1fad329..44fa435f7e8e50da747018b9bbdc7c266d7958c3 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/ceval_exam.py
@@ -31,7 +31,7 @@ from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.ceval_utils import format_ceval_templates, first_capital_postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
 from mindspeed_llm.tasks.evaluation.eval_impl.template import CEVAL_TEMPLATE_DIR, get_eval_template
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logger = logging.getLogger(__name__)
 
@@ -39,7 +39,7 @@ logger = logging.getLogger(__name__)
 class CEvalExam(DatasetEval):
     def __init__(self, test_dir, eval_args,
                  instruction_template="{fewshot_template}\n\n问：{question}\n答："):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.batch_size = eval_args.evaluation_batch_size
         self.rank = dist.get_rank()
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py
index 480fc1e3ea6b6303edc802ac34c992b5861bea13..542e124974692e894fbe4e1a3ab4254d8eaa256d 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/cmmlu_eval.py
@@ -31,6 +31,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.cmmlu_utils import cmmlu_subject_mapping, first_option_postprocess, cmmlu_format_example
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .template import CMMLU_TEMPLATE_DIR, get_eval_template
 
 
@@ -43,7 +44,7 @@ class CmmluEval(DatasetEval):
                                       "{question}\n答案： ",
                  output_template1=r".*(?P<答案>[A|B|C|D])\..*",
                  output_template2=r"(?P<答案>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.output_template = [output_template1, output_template2]
         self.batch_size = eval_args.evaluation_batch_size
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py
index 6417dffd14324489c7f1c0962e7fae45f64d5f85..0277bf37e01463eaadcc2030b2aa695bb5cbe1d8 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/gsm8k_eval.py
@@ -30,6 +30,7 @@ from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.gsm8k_utils import four_shots_prompt, gsm8k_postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
 from mindspeed_llm.tasks.evaluation.eval_impl.template import GSM8K_TEMPLATE_DIR
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logger = logging.getLogger(__name__)
 
@@ -38,7 +39,7 @@ class Gsm8kEval(DatasetEval):
     def __init__(self, test_dir, eval_args,
                  instruction_template="{fewshot_template}\n\n{question}",
                  output_template=r'The answer is (.*?) '):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.output_template = output_template
         self.batch_size = eval_args.evaluation_batch_size      
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py
index 6dff8f53a208652e6bb8da791529310bdc7f0a84..e01fa0e8f43ed7ae01b5f51946dbf4672b84a038 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/hellaswag_eval.py
@@ -18,7 +18,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.mmlu_utils import postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_list_dataset
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logger = logging.getLogger(__name__)
 
@@ -27,7 +27,7 @@ class HellaswagEval(DatasetEval):
     def __init__(self, test_dir, eval_args,
                  output_template1=r".*(?P<answer>[A|B|C|D])\..*",
                  output_template2=r"(?P<answer>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.output_template = [output_template1, output_template2]
         self.instruction_template = ('{ctx}\nQuestion: Which ending makes the most sense?\n'
                                      'A. {A}\nB. {B}\nC. {C}\nD. {D}\n'
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py
index ef5a20397262862072570115be5e84e605fd08e3..0f6bf2044e617a9eac6e66d7f299243b2344c44f 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/human_eval.py
@@ -18,6 +18,7 @@ import os
 import logging
 import re
 import sys
+import ast
 import subprocess
 from typing import Iterable, Dict
 import pandas as pd
@@ -33,16 +34,55 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.training.utils import WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES
 from mindspeed_llm.tasks.evaluation.eval_utils.human_utils import humaneval_postprocess, get_score
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 logger = logging.getLogger(__name__)
 
 
+def is_code_dangerous(code: str, dangerous_patterns) -> bool:
+    """AST 检测提权、外联、文件篡改"""
+
+    # 正则检测（快速过滤）
+    for pattern in dangerous_patterns:
+        if re.search(pattern, code):
+            return True
+
+    # AST 语义分析（防绕过）
+    try:
+        tree = ast.parse(code)
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Call):
+                if isinstance(node.func, ast.Name):
+                    if node.func.id in ("exec", "eval", "open", "os", "subprocess"):
+                        return True
+            elif isinstance(node, (ast.Import, ast.ImportFrom)):
+                for alias in node.names:
+                    if alias.name in ("os", "sys", "subprocess"):
+                        return True
+
+            # 检测 os.system("sudo ...")
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Attribute) and node.func.attr == "system":
+                return True
+            if any(isinstance(arg, ast.Str) and ("sudo" in arg.s or "curl" in arg.s) for arg in node.args):
+                return True
+            # 检测动态导入（如 __import__("os").system(...)）
+            if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "__import__":
+                return True
+
+        return False
+    except SyntaxError:
+        return True  # 语法错误视为危险
+
+
 def extract_answer_code(answer, task: dict):
     """
     :param answer:
     :param task:
     :return:
     """
+
+    if is_code_dangerous(answer, self.dangerous_patterns) or is_code_dangerous(task["test"], self.dangerous_patterns):
+        raise ValueError("Unsafe code detected")
+
     task_id = task['task_id']
     target_func = task['entry_point']
     test_case = task['test']
@@ -51,7 +91,7 @@ def extract_answer_code(answer, task: dict):
     code_lines = code.split("\n")
     target_func_flag = False
     if not os.path.exists(CODE_TEST_LOG_DIR):
-        os.makedirs(CODE_TEST_LOG_DIR)
+        os.makedirs(CODE_TEST_LOG_DIR, mode=0o750, exist_ok=True)
     test_code_path = "{}/{}".format(CODE_TEST_LOG_DIR, save_file)
     with os.fdopen(os.open(test_code_path, WRITE_FILE_DEFAULT_FLAGS, WRITE_FILE_DEFAULT_MODES), 'w') as f:
         f.write("from typing import List\n")
@@ -85,7 +125,7 @@ def extract_answer_code(answer, task: dict):
 
 class HumanEval(DatasetEval):
     def __init__(self, test_dir, eval_args):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         instruction_template = eval_args.instruction_template
         if instruction_template:
             self.instruction_template = instruction_template
@@ -96,6 +136,10 @@ class HumanEval(DatasetEval):
         self.file_pbar = None
         self.task_pbar = None
         self.prompt = 'Complete the following python code:\n{prompt}'
+        self.dangerous_patterns = []
+        with open("configs/dangerous_shell.json", "r", encoding="utf-8") as f:
+            self.dangerous_patterns = json.load(f)
+            print(self.dangerous_patterns)
 
     def read_problems(self) -> Dict[str, Dict]:
         return {task["task_id"]: task for task in self.stream_jsonl(self.test_dir)}
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py b/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py
index 05867319d42814918f40d34f332a0dfb746710a8..4013a7957c0f53726dd2c1f8e8ccb9c944886fd1 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/mmlu_eval.py
@@ -30,6 +30,7 @@ from mindspeed_llm.tasks.evaluation.eval_api.chat import Chat
 from mindspeed_llm.tasks.utils.error_utils import check_divisible_by_zero
 from mindspeed_llm.tasks.evaluation.eval_utils.mmlu_utils import _format_example, postprocess
 from mindspeed_llm.tasks.evaluation.utils import get_final_dataset
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from .template import MMLU_TEMPLATE_DIR, get_eval_template
 
 
@@ -42,7 +43,7 @@ class MmluEval(DatasetEval):
                                       "{question}\nAnswer:",
                  output_template1=r".*(?P<answer>[A|B|C|D])\..*",
                  output_template2=r"(?P<answer>[A|B|C|D])"):
-        self.test_dir = test_dir
+        self.test_dir = standardize_path(test_dir, check_read=True)
         self.instruction_template = instruction_template
         self.output_template = [output_template1, output_template2]
         self.batch_size = eval_args.evaluation_batch_size
diff --git a/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py b/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py
index 22a923c79eca547b4048039ad348429d62290dce..22baf1d08c2c2a91948ad18a4a0c2b527c9e1a97 100644
--- a/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py
+++ b/mindspeed_llm/tasks/evaluation/eval_impl/needlebench_single.py
@@ -157,7 +157,7 @@ def load_datasets(config: NeedleBenchConfig):
         dataset = []
         needle_file_path = os.path.join(config.dataset_dir, "needles.jsonl")
         cache_dir = os.path.join(tempfile.gettempdir(), "data-gym-cache")
-        os.makedirs(cache_dir, exist_ok=True)
+        os.makedirs(cache_dir, mode=0o750, exist_ok=True)
         cache_file = "9b5ad71b2ce5302211f9c61530b329a4922fc6a4"
         dest_filepath = os.path.join(cache_dir, cache_file)
         # 参考docs中的needlebench-evaluation.md下载cl100k_base.tiktoken，将其放置到该数据集文件夹
diff --git a/mindspeed_llm/tasks/evaluation/eval_utils/human_utils.py b/mindspeed_llm/tasks/evaluation/eval_utils/human_utils.py
index 4b29bd2c73e942bc06de06273e69ea6044c15a98..3872dac41453fa9500f816c836fa6796519d89db 100644
--- a/mindspeed_llm/tasks/evaluation/eval_utils/human_utils.py
+++ b/mindspeed_llm/tasks/evaluation/eval_utils/human_utils.py
@@ -70,7 +70,7 @@ def check_correctness(problem: Dict, completion: str, timeout: float,
 
             rmtree = shutil.rmtree
             rmdir = os.rmdir
-            chdir = os.chdir
+            os_chdir = os.chdir
 
             reliability_guard()
 
@@ -80,8 +80,8 @@ def check_correctness(problem: Dict, completion: str, timeout: float,
                 f"check({problem['entry_point']})"
             )
 
+            exec_globals = {}
             try:
-                exec_globals = {}
                 with swallow_io():
                     with time_limit(timeout):
                         exec(check_program, exec_globals)
@@ -93,7 +93,7 @@ def check_correctness(problem: Dict, completion: str, timeout: float,
 
             shutil.rmtree = rmtree
             os.rmdir = rmdir
-            os.chdir = chdir
+            os.chdir = os_chdir
 
     manager = multiprocessing.Manager()
     result = manager.list()
@@ -211,7 +211,7 @@ def get_score(predictions, references, test_set, problem_set):
 
 def evaluate_functional_correctness(
     sample_file: str,
-    problem_file: dict,
+    problem_file: dict = None,
     k: List[int] = [1, 10, 100],
     n_workers: int = 4,
     timeout: float = 3.0,
@@ -220,6 +220,8 @@ def evaluate_functional_correctness(
     Evaluates the functional correctness of generated samples, and writes
     results to f"{sample_file}_results.jsonl.gz"
     """
+    if problem_file is None:
+        problem_file = {}
 
     problems = problem_file
 
@@ -259,7 +261,8 @@ def evaluate_functional_correctness(
     ks = k
     pass_at_k = {
         f"pass@{k}": estimate_pass_at_k(total, correct, k).mean()
-        for k in ks if (total >= k).all()
+        for k in ks
+        if (total >= k).all()
     }
 
     # Finally, save the results in one file:
diff --git a/mindspeed_llm/tasks/evaluation/file_utils.py b/mindspeed_llm/tasks/evaluation/file_utils.py
index 6e52342fddc42a7fec706bf174105e7ff1c2909a..6f7032c2a7ceccff175f84fbe9d578b2472d78bb 100644
--- a/mindspeed_llm/tasks/evaluation/file_utils.py
+++ b/mindspeed_llm/tasks/evaluation/file_utils.py
@@ -2,6 +2,7 @@
 from functools import reduce
 import os
 import stat
+import re
 
 MAX_PATH_LENGTH = 4096
 MAX_FILE_SIZE = 100 * 1024 * 1024
@@ -52,17 +53,36 @@ def safe_open(file_path: str, mode='r', encoding=None, permission_mode=0o600, is
                      mode, encoding=encoding)
 
 
-def standardize_path(path: str, max_path_length=MAX_PATH_LENGTH, check_link=True):
+def standardize_path(path: str, max_path_length=MAX_PATH_LENGTH, check_link=True, check_read=True, check_write=True):
     """
     check path
     param: path
     return: data real path after check
     """
     check_path_is_none(path)
+    if path:
+        path = os.path.realpath(path)
+    else:
+        return None
+
+    if os.path.exists(path):
+        if check_read and not os.access(path, os.R_OK):
+            raise RuntimeError(f"File {path} not readable")
+
+        if check_write and not os.access(path, os.W_OK):
+            raise RuntimeError(f"File {path} not writable")
+    else:
+        print(f"Path: {path} not exists")
+
     check_path_length_lt(path, max_path_length)
     if check_link:
         check_path_is_link(path)
     path = os.path.realpath(path)
+
+    pattern = r'(\.|/|_|-|\s|[~0-9a-zA-Z]|[\u4e00-\u9fa5])+'
+    if not re.fullmatch(pattern, path):
+        raise RuntimeError(f"Invalid input path: {path}")
+
     return path
 
 
diff --git a/mindspeed_llm/tasks/models/spec/qwen3_spec.py b/mindspeed_llm/tasks/models/spec/qwen3_spec.py
index 925eb72b3493e5ab92e5b0b6ecadf87fa0fd3a5e..60568873ccfd014834b3e980f615d1e35e5998f2 100644
--- a/mindspeed_llm/tasks/models/spec/qwen3_spec.py
+++ b/mindspeed_llm/tasks/models/spec/qwen3_spec.py
@@ -7,9 +7,10 @@ from megatron.core.transformer.identity_op import IdentityOp
 from megatron.core.transformer.spec_utils import ModuleSpec
 from megatron.core.models.gpt.gpt_layer_specs import _get_mlp_module_spec
 from megatron.training import get_args
+from megatron.core.transformer import ModuleSpec, TransformerLayer, TransformerLayerSubmodules
 
 from mindspeed_llm.core.transformer.custom_layers.transformer_engine import PTNorm
-from megatron.core.transformer import ModuleSpec, TransformerLayer, TransformerLayerSubmodules
+
 
 args = get_args()
 num_experts, moe_grouped_gemm, qk_layernorm = args.num_experts, args.moe_grouped_gemm, args.qk_layernorm
diff --git a/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py b/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py
index 8f07e6edda8859d174cda383f491f12d9cf79f31..0343f857fcf782ad9f4a6306a64c20ea97bf2dcc 100644
--- a/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py
+++ b/mindspeed_llm/tasks/posttrain/rejection_sampling/rejection_sampling.py
@@ -2,7 +2,7 @@ import argparse
 import gc
 import json
 import re
-
+import os
 import jsonlines
 import pandas as pd
 import torch
@@ -13,6 +13,7 @@ from vllm.distributed.parallel_state import (destroy_distributed_environment, de
 
 from utils import blending_datasets, PromptGtAnswerDataset, apply_GenRM_template, rejection_sampling_processor
 from mindspeed_llm.tasks.posttrain.verifier.rule_verifier import preprocess_box_response_for_qwen_prompt
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 
 def clean_up():
@@ -36,13 +37,13 @@ def batch_generate_vllm(args):
     dummy_strategy.args = args
 
     # configure tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(args.pretrain, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(args.pretrain, trust_remote_code=args.trust_remote_code)
 
     # configure model
     llm = LLM(
         model=args.pretrain,
         tensor_parallel_size=args.tp_size,
-        trust_remote_code=True,
+        trust_remote_code=args.trust_remote_code,
         seed=args.seed,
         max_num_seqs=args.max_num_seqs,
         enable_prefix_caching=args.enable_prefix_caching,
@@ -107,7 +108,7 @@ def batch_GenRM_rejection_sampling(args):
     llm = LLM(
         model=args.pretrain,
         tensor_parallel_size=args.tp_size,
-        trust_remote_code=True,
+        trust_remote_code=args.trust_remote_code,
         seed=args.seed,
         max_num_seqs=args.max_num_seqs,
         enable_prefix_caching=args.enable_prefix_caching,
@@ -215,9 +216,15 @@ if __name__ == "__main__":
     parser.add_argument("--iter", type=int, default=None,
                         help="Used to slice the datasets in range iter * rollout_batch_size: (iter + 1) * rollout_batch_size", )
     parser.add_argument("--rollout-batch-size", type=int, default=2048, help="Number of samples to generate")
+    parser.add_argument('--trust-remote-code',
+                       action='store_true',
+                       default=False,
+                       help='enable trust-remote-code for transformer to load model')
 
     args = parser.parse_args()
 
+    args.output_path = standardize_path(args.output_path, check_write=True)
+
     if args.task and args.task == "generate_vllm":
         batch_generate_vllm(args)
     elif args.task and args.task == "rejection_sampling":
diff --git a/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py b/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py
index b5d10c4dd8a23dbcbcbe347f0e4e1eb2d18c9dd6..e397fe44520b127994e91ccf3f4c5010ef46bcab 100644
--- a/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py
+++ b/mindspeed_llm/tasks/posttrain/rejection_sampling/utils.py
@@ -31,7 +31,7 @@ def blending_datasets(
         ext = os.path.splitext(dataset)[-1]
         # local python script
         if ext == ".py" or (os.path.isdir(dataset) and os.path.exists(os.path.join(dataset, f"{dataset_basename}.py"))):
-            data = load_dataset(dataset, trust_remote_code=True)
+            data = load_dataset(dataset, trust_remote_code=False)
             strategy.print(f"loaded {dataset} with python script")
         # local text file
         elif ext in [".json", ".jsonl", ".csv"]:
diff --git a/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py b/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py
index 9bd1fb937d8c9edbba9bd6923f1ad4c3eeaa4756..493cf4bb3312fd099eec3ac5ceee46fe96839132 100644
--- a/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py
+++ b/mindspeed_llm/tasks/posttrain/sft/sft_trainer.py
@@ -13,7 +13,7 @@ from megatron.training import get_timers
 try:
     from mindspeed.core.pipeline_parallel.dualpipev.dualpipev_schedules import set_post_process_flag
 except ImportError:
-    pass
+    print("[warning] failed import dualpipe modules, not support dualpipe")
 from mindspeed_llm.training.utils import get_tune_attention_mask, get_finetune_data_on_this_tp_rank, generate_actual_seq_len
 from mindspeed_llm.tasks.posttrain.base import BaseTrainer
 from mindspeed_llm.training.utils import generate_actual_seq_len, set_mtp_batch_list, get_mtp_batch_list
diff --git a/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py b/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py
index 39da439f6a2b743ef34f1de0fd096e4708fa25e8..69e22b65124dd9af84723bdcd7ed1e9579683862 100644
--- a/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py
+++ b/mindspeed_llm/tasks/posttrain/trl_ppo/utils.py
@@ -1,5 +1,5 @@
 from typing import Union
-
+import os
 import torch
 
 from megatron.core import mpu, dist_checkpointing
@@ -19,6 +19,7 @@ from megatron.training.training import compute_throughputs_and_append_to_progres
 from megatron.training.utils import unwrap_model, print_rank_0, append_to_progress_log
 from megatron.training.yaml_arguments import core_transformer_config_from_yaml
 from mindspeed_llm.tasks.posttrain.orm.orm_model import GPTRewardModel
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 
 def model_provider(is_reward_model=False, pre_process=True, post_process=True) -> Union[GPTModel]:
@@ -137,6 +138,8 @@ def save_checkpoint(iteration, model, optimizer, opt_param_scheduler,
     if save_model_type:
         save_path = args.save + '/' + save_model_type
 
+    save_path = standardize_path(save_path, check_write=True)
+
     ckpt_format = args.ckpt_format if args.use_dist_ckpt else 'torch'
     print_rank_0('saving checkpoint at iteration {:7d} to {} in {} format'.format(
         iteration, save_path, ckpt_format))
diff --git a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py
index 1f435401190d2933edfe94566117310b7d7acaae..47cf553a35566bd6477dae7a37fd614908f909f9 100644
--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/grader.py
@@ -38,7 +38,7 @@ def parse_digits(num):
             try:
                 return float(num) / 100
             except:
-                pass
+                return None
     return None
 
 
diff --git a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py
index e2805f7443e1ef263a9c09f90a7cd8239f239102..1cd251d4fea9df296c9b775dd19fbf90a9e9f012 100644
--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/parser.py
@@ -1,5 +1,6 @@
 import random
 import re
+import ast
 from typing import TypeVar, Iterable, List, Union, Any, Dict
 
 import regex
@@ -75,7 +76,7 @@ def convert_word_number(text: str) -> str:
     try:
         text = str(w2n.word_to_num(text))
     except ValueError:
-        pass
+        return None
     return text
 
 
@@ -468,7 +469,7 @@ def extract_theoremqa_answer(pred: str, answer_flag: bool = True):
             pred = clean_units(pred)
             try:
                 tmp = str(latex2sympy(pred))
-                pred = str(eval(tmp))
+                pred = str(ast.literal_eval(tmp))
             except Exception:
                 if re.match(r"-?[\d\.]+\s\D+$", pred):
                     pred = pred.split(" ")[0]
diff --git a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py
index d84b3fbd82a69279b10c803cdbf16d69a0e7e624..b19ea603b29576ddfc209e861ed742ff535b2dba 100644
--- a/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py
+++ b/mindspeed_llm/tasks/posttrain/verifier/math_eval_toolkit/utils.py
@@ -21,16 +21,14 @@ def load_jsonl(file: Union[str, Path]) -> Iterable[Any]:
                 yield json.loads(line)
             except json.JSONDecodeError as e:
                 print("Error in loading JSON:", line, "Error:", e)
-                pass
             except Exception as e:
                 print("Unexpected error in loading:", line, "Error:", e)
-                pass
 
 
 def save_jsonl(samples, save_path):
     # ensure path
     folder = os.path.dirname(save_path)
-    os.makedirs(folder, exist_ok=True)
+    os.makedirs(folder, mode=0o750, exist_ok=True)
 
     with open(save_path, "w", encoding="utf-8") as f:
         for sample in samples:
diff --git a/mindspeed_llm/tasks/preprocess/data_handler.py b/mindspeed_llm/tasks/preprocess/data_handler.py
index 8d4d6cf325b9518c072e04a827aa672827e591eb..2ec2fb879a554b11a9ecf5ee5558df84b5920d28 100644
--- a/mindspeed_llm/tasks/preprocess/data_handler.py
+++ b/mindspeed_llm/tasks/preprocess/data_handler.py
@@ -30,6 +30,7 @@ from datasets import load_dataset
 from megatron.core.datasets import indexed_dataset
 
 from mindspeed_llm.tasks.preprocess.templates import Prompter, AlpacaTemplate, get_model_template
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 from mindspeed_llm.tasks.posttrain.utils import convert_token_to_id
 from .decoder_packed_mtf_dataset import _infer_seqlen
 
@@ -626,7 +627,7 @@ class AlpacaStyleProcessRewardHandler(BaseDatasetHandler):
 
         concatenated_ids = {
             "input_ids": [input_token],
-            "attention_mask":[attention_mask],
+            "attention_mask": [attention_mask],
             "labels": [label_token]
         }
 
@@ -1067,6 +1068,7 @@ def build_dataset(args):
             # for MOSS, streaming is needed.
             args.streaming = True
         if args.hf_datasets_params:
+            args.hf_datasets_params = standardize_path(args.hf_datasets_params, check_read=True)
             with open(args.hf_datasets_params, 'r') as fin:
                 param_dict = json.load(fin)
             return load_dataset(**param_dict)
diff --git a/mindspeed_llm/tasks/preprocess/formatter.py b/mindspeed_llm/tasks/preprocess/formatter.py
index 489b28abba585e0abf47913e2236930d9a4fe93f..02af7c1f3774e1d775e6d06ce192a52902b4cdf6 100644
--- a/mindspeed_llm/tasks/preprocess/formatter.py
+++ b/mindspeed_llm/tasks/preprocess/formatter.py
@@ -191,6 +191,7 @@ class ToolFormatter(Formatter):
         if self.tool_format is None:
             raise ValueError("Tool format was not found.")
 
+
     def apply(self, **kwargs) -> SLOTS:
         content = kwargs.pop("content")
         try:
@@ -202,7 +203,8 @@ class ToolFormatter(Formatter):
                 return [default_tool_formatter(tools)]
             else:
                 raise NotImplementedError
-        except Exception:
+        except Exception as e:
+            print(f"[warning] Unexpected error processing content: {content}. Error: {e}")
             return [""]
 
     def extract(self, content: str) -> Union[str, Tuple[str, str]]:
diff --git a/mindspeed_llm/tasks/preprocess/utils.py b/mindspeed_llm/tasks/preprocess/utils.py
index 7f9d2d5523188f960c732afcd8643c7245e72840..90448c54f6bd950f7d161d1d7d3551ab2b715753 100644
--- a/mindspeed_llm/tasks/preprocess/utils.py
+++ b/mindspeed_llm/tasks/preprocess/utils.py
@@ -23,7 +23,7 @@ from datasets import load_dataset, concatenate_datasets, interleave_datasets
 
 from mindspeed_llm.tasks.preprocess.templates import Role
 from mindspeed_llm.tasks.preprocess.parser import InstructionDatasetAttr
-
+from mindspeed_llm.tasks.evaluation.file_utils import standardize_path
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -103,6 +103,7 @@ def get_dataset_list(data_args) -> List["InstructionDatasetAttr"]:
     else:
         dataset_names = []
 
+    data_args.dataset_dir = standardize_path(data_args.dataset_dir, check_read=True)
     try:
         with open(os.path.join(data_args.dataset_dir, DATA_CONFIG), "r") as f:
             dataset_info = json.load(f)
diff --git a/mindspeed_llm/training/tokenizer/tokenizer.py b/mindspeed_llm/training/tokenizer/tokenizer.py
index b82b094270224f378691286472a308d25c2e46d3..5b0e48cc3fdbce41a94544397361c20f4ea6aed8 100644
--- a/mindspeed_llm/training/tokenizer/tokenizer.py
+++ b/mindspeed_llm/training/tokenizer/tokenizer.py
@@ -49,6 +49,7 @@ def build_tokenizer(args):
             model_max_length=args.seq_length,
             use_fast=args.tokenizer_not_use_fast,
             prompt_type=args.prompt_type,
+            trust_remote_code=args.trust_remote_code,
             **hf_tokenizer_kwargs
         )
 
@@ -108,7 +109,7 @@ class TokenizerAdaptor:
 class _AutoTokenizer(MegatronTokenizer):
     """AutoTokenizer for Hf Pretrained model loading."""
 
-    def __init__(self, tokenizer_name_or_path, vocab_extra_ids, model_max_length, use_fast, prompt_type=None, **kwargs):
+    def __init__(self, tokenizer_name_or_path, vocab_extra_ids, model_max_length, use_fast, prompt_type=None, trust_remote_code=False, **kwargs):
         name = tokenizer_name_or_path
         super().__init__(name)
         hf_tokenizer_kwargs = kwargs
@@ -117,7 +118,7 @@ class _AutoTokenizer(MegatronTokenizer):
 
         hf_tokenizer_kwargs["model_max_length"] = model_max_length
         hf_tokenizer_kwargs["use_fast"] = use_fast
-        hf_tokenizer_kwargs["trust_remote_code"] = True
+        hf_tokenizer_kwargs["trust_remote_code"] = trust_remote_code
         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, **hf_tokenizer_kwargs, local_files_only=True)
         if (prompt_type is None) and (self.tokenizer.pad_token_id is None):
             self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
diff --git a/mindspeed_llm/training/training.py b/mindspeed_llm/training/training.py
index 1b6c05d85865a40cede4e00b6005ff28fd13644f..d6ca28e66188dd7b4a2c6e959e196e92315af752 100644
--- a/mindspeed_llm/training/training.py
+++ b/mindspeed_llm/training/training.py
@@ -18,7 +18,6 @@ import os
 import gc
 import sys
 import json
-from datetime import datetime
 from functools import wraps
 import logging
 
@@ -27,7 +26,6 @@ import time
 import torch
 import torch_npu
 
-from megatron.core.transformer.moe.moe_utils import track_moe_metrics
 from megatron.training import get_args
 from megatron.training import get_timers
 from megatron.training import get_signal_handler
diff --git a/mindspeed_llm/training/utils.py b/mindspeed_llm/training/utils.py
index 8a9a8238d94680a24f59e26a33c9756b80bc445d..38a5abe6221c9252b4eb8aecf61216766669831d 100644
--- a/mindspeed_llm/training/utils.py
+++ b/mindspeed_llm/training/utils.py
@@ -18,6 +18,7 @@ import os
 import stat
 import random
 import warnings
+import logging
 from functools import wraps
 from typing import Optional, Union, List
 from itertools import takewhile
@@ -45,14 +46,16 @@ from mindspeed_llm.tasks.dataset.shared_memory_manager import SharedMemoryManage
 
 try:
     from mindspeed.core.pipeline_parallel.dualpipev.dualpipev_schedules import get_post_process_flag
-except Exception:
-    pass
+except Exception as warn_get_post_process_flag:
+    logging.error(f"Failed to import get_post_process_flag: {warn_get_post_process_flag}")
 
 try:
     _torch_version = PkgVersion(torch.__version__)
-except Exception:
-    # This is a WAR for building docs, where torch is not actually imported
+except Exception as warn_torch_ver:
+    logging.error(f"Failed to get torch version: {warn_torch_ver}")
+    # 这是一个特殊情况，用于构建文档时torch未被导入
     _torch_version = PkgVersion("0.0.0")
+    logging.warning("Using default torch version '0.0.0' for documentation build.")
 
 
 WRITE_FILE_DEFAULT_FLAGS = os.O_WRONLY | os.O_CREAT
diff --git a/preprocess_data.py b/preprocess_data.py
index 8d56d4ddf927b0e362235142e51cdb626a171316..f5e390f4679dbe17110351ca8c14f0c11b4a90d9 100644
--- a/preprocess_data.py
+++ b/preprocess_data.py
@@ -198,6 +198,10 @@ def add_tokenizer_args(parser):
         default=[],
         help="The labels represent the correctness of each reasoning step in the entire reasoning process.",
     )
+    parser.add_argument('--trust-remote-code',
+                        action='store_true',
+                        default=False,
+                        help='enable trust-remote-code for transformer to load model')
 
 
 def add_output_args(parser):
diff --git a/pretrain_mamba.py b/pretrain_mamba.py
index fa99b879e845811a873baa0f7eb9749b0b43617d..419e01cf1d3608709ea15a47cc1505e07ae21e4d 100644
--- a/pretrain_mamba.py
+++ b/pretrain_mamba.py
@@ -6,6 +6,7 @@ from functools import partial
 from typing import List, Optional
 
 import torch
+
 from mindspeed_llm import megatron_adaptor
 from megatron.training import get_args
 from megatron.training import print_rank_0
@@ -64,7 +65,7 @@ def model_provider(pre_process=True, post_process=True) -> MambaModel:
     if args.spec is not None:
         mamba_stack_spec = import_module(args.spec)
     else:
-        raise("You must provide a valid Mamba layer spec!")
+        raise "You must provide a valid Mamba layer spec!"
 
     model = MambaModel(
         config=config,
@@ -103,13 +104,8 @@ def get_batch(data_iterator):
     # get batches based on the TP rank you are on
     batch, actual_seq_len = get_batch_on_this_tp_rank(data_iterator)
     args = get_args()
-    if args.return_document_ids and all(
-    rank == 0 for rank in (
-        mpu.get_context_parallel_rank(),
-        mpu.get_tensor_model_parallel_rank(),
-        mpu.get_pipeline_model_parallel_rank()
-    )
-    ):
+    is_rank_0 = (mpu.get_context_parallel_rank() == 0 and mpu.get_tensor_model_parallel_rank() == 0 and mpu.get_pipeline_model_parallel_rank() == 0)
+    if args.return_document_ids and is_rank_0:
         print("current idx: {}, current rank: {}, data_parallel_rank: {}, document_ids: {}".format(batch['idx'], torch.distributed.get_rank(), mpu.get_data_parallel_rank(), batch['document_ids']))
         batch.pop('document_ids', None)
         batch.pop('idx', None)
diff --git a/tests/st/shell_scripts/chatglm3_gqa_cp4.sh b/tests/st/shell_scripts/chatglm3_gqa_cp4.sh
index 3d0551af485cc74b89be9120b64cd2864621098b..3d793ba29287c907bf5d2e531ba59f7cde2de7cc 100644
--- a/tests/st/shell_scripts/chatglm3_gqa_cp4.sh
+++ b/tests/st/shell_scripts/chatglm3_gqa_cp4.sh
@@ -31,6 +31,7 @@ DISTRIBUTED_ARGS="
 "
 
 GPT_ARGS="
+    --trust-remote-code \
     --use-mcore-models \
     --manual-gc \
     --manual-gc-interval 50 \
diff --git a/tests/st/shell_scripts/deepseek_500b_tp1_pp2_ep2_cp2_overlap.sh b/tests/st/shell_scripts/deepseek_500b_tp1_pp2_ep2_cp2_overlap.sh
index 7bd4b9f2d8f9df65daa1f811657859b85ec12f73..6815a33636eddf518edb3aa8d7f621c3e2798411 100644
--- a/tests/st/shell_scripts/deepseek_500b_tp1_pp2_ep2_cp2_overlap.sh
+++ b/tests/st/shell_scripts/deepseek_500b_tp1_pp2_ep2_cp2_overlap.sh
@@ -46,6 +46,7 @@ MOE_ARGS="
 "
 
 GPT_ARGS="
+    --trust-remote-code \
     --use-mcore-models \
     --manual-gc \
     --manual-gc-interval 50 \
diff --git a/tests/st/shell_scripts/deepseek_v3_mcore_tp1_pp2_ep4.sh b/tests/st/shell_scripts/deepseek_v3_mcore_tp1_pp2_ep4.sh
index 1e10e94d4adcb8ebcdf1e32cf1ecb5c56c80e063..96af75d795500a3a31800533cee74fed9848a92d 100644
--- a/tests/st/shell_scripts/deepseek_v3_mcore_tp1_pp2_ep4.sh
+++ b/tests/st/shell_scripts/deepseek_v3_mcore_tp1_pp2_ep4.sh
@@ -83,6 +83,7 @@ ROPE_ARGS="
 
 GPT_ARGS="
     --finetune \
+    --trust-remote-code \
     --spec mindspeed_llm.tasks.models.spec.deepseek_spec layer_spec \
     --noop-layers 2,3 \
     --recompute-granularity full \
diff --git a/tests/st/shell_scripts/deepseek_v3_mcore_tp2_pp2_ep2_dualpipev_fb.sh b/tests/st/shell_scripts/deepseek_v3_mcore_tp2_pp2_ep2_dualpipev_fb.sh
index 687825d4798fcc3788ede42fd8e43409d19d9d9b..ea6ebf30d05897a371088f01a693ce7327948139 100644
--- a/tests/st/shell_scripts/deepseek_v3_mcore_tp2_pp2_ep2_dualpipev_fb.sh
+++ b/tests/st/shell_scripts/deepseek_v3_mcore_tp2_pp2_ep2_dualpipev_fb.sh
@@ -102,6 +102,7 @@ ROPE_ARGS="
 "
 
 GPT_ARGS="
+    --trust-remote-code \
     --transformer-impl local \
     --spec mindspeed_llm.tasks.models.spec.deepseek_spec layer_spec \
     --reset-position-ids \
diff --git a/tests/st/shell_scripts/mixtral_mcore_tp4_cp2_ep2_ptd.sh b/tests/st/shell_scripts/mixtral_mcore_tp4_cp2_ep2_ptd.sh
index 593e68abbf3e28c03a74ea1204a681be8da0d977..b286515ced252c2c50c7bd178a2bc6aa7fb39e76 100644
--- a/tests/st/shell_scripts/mixtral_mcore_tp4_cp2_ep2_ptd.sh
+++ b/tests/st/shell_scripts/mixtral_mcore_tp4_cp2_ep2_ptd.sh
@@ -44,6 +44,7 @@ MOE_ARGS=(
 )
 
 GPT_ARGS=(
+    --trust-remote-code
     --use-mcore-models
     --manual-gc
     --manual-gc-interval 50
diff --git a/tests/ut/checkpoint/test_checkpoint.json b/tests/ut/checkpoint/test_checkpoint.json
index e18c38493f0a5ab4c40a5753f06bcd261915cdcb..7a67649870e57aa7dc60b0bb98e4f4696dac3fbc 100644
--- a/tests/ut/checkpoint/test_checkpoint.json
+++ b/tests/ut/checkpoint/test_checkpoint.json
@@ -2,6 +2,7 @@
     "test_deepseek2_hf2mcore_tp1pp4ep8": [
         {
             "param": {
+                "trust-remote-code":null,
                 "model-type":"GPT",
                 "load-model-type":"hf",
                 "save-model-type":"mg",
@@ -23,6 +24,7 @@
     "test_deepseek2_mcore2hf_tp1pp4ep8": [
         {
             "param": {
+                "trust-remote-code":null,
                 "model-type":"GPT",
                 "load-model-type":"mg",
                 "save-model-type": "hf",