From 67b2931a1910e207fbde524a59487d6bd1f22eaf Mon Sep 17 00:00:00 2001
From: zxq <342239412@qq.com>
Date: Thu, 18 Sep 2025 19:16:20 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90feature=E3=80=91=E3=80=90master?=
 =?UTF-8?q?=E3=80=91=E6=9D=83=E9=87=8D=E5=8A=A0=E8=BD=BD=E4=B8=8B=E6=9E=B6?=
 =?UTF-8?q?cpu=5Foffload=5Fweights=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mindformers/models/auto/auto_factory.py       |  2 --
 mindformers/models/build_config.py            |  2 --
 .../tensor_parallel/grouped_layers.py         |  5 +----
 .../inference/tensor_parallel/layers.py       | 11 +----------
 .../parallel_core/inference/weights_utils.py  | 19 -------------------
 .../parallel_core/model_parallel_config.py    |  3 ---
 .../parallel_core/transformer_config_utils.py |  1 -
 mindformers/tools/register/template.py        |  1 -
 mindformers/trainer/base_trainer.py           |  2 --
 .../test_tensorboard/test_tensorboard.py      |  4 ++--
 10 files changed, 4 insertions(+), 46 deletions(-)

diff --git a/mindformers/models/auto/auto_factory.py b/mindformers/models/auto/auto_factory.py
index b9b0559a4..2bdb3c038 100644
--- a/mindformers/models/auto/auto_factory.py
+++ b/mindformers/models/auto/auto_factory.py
@@ -369,8 +369,6 @@ class _BaseAutoModelClass:
                 config_args.parallel_config = TransformerOpParallelConfig(
                     **config_args.parallel_config)
             config_args.model.model_config.parallel_config = config_args.parallel_config
-        if config_args.get("cpu_offloading_weights", False):
-            config_args.model.cpu_offloading_weights = config_args.cpu_offloading_weights
         model = build_network(config_args.model)
         logger.info("model built successfully!")
         return model
diff --git a/mindformers/models/build_config.py b/mindformers/models/build_config.py
index 47fbaf609..841c0fe3a 100644
--- a/mindformers/models/build_config.py
+++ b/mindformers/models/build_config.py
@@ -97,7 +97,6 @@ def get_model_config(
         config = copy.deepcopy(config)
         use_model_config = config.get_value("model_config", None)
         use_pretrained_model_dir = config.get_value("pretrained_model_dir", None)
-        cpu_offloading_weights = config.get_value("cpu_offloading_weights", False)
         if not use_pretrained_model_dir and not use_model_config:
             return None
         if use_pretrained_model_dir:
@@ -118,7 +117,6 @@ def get_model_config(
                     model_config['quantization'] = quant_config.get("quant_method")
         else:
             model_config = config.model_config
-        model_config.update({'cpu_offloading_weights': cpu_offloading_weights})
         return MindFormerRegister.get_instance_from_cfg(
             model_config, MindFormerModuleType.CONFIG, default_args=default_args)
     return MindFormerRegister.get_instance(module_type, class_name, **kwargs)
diff --git a/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py b/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py
index 51de4a20b..fc1a9ba6a 100644
--- a/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py
+++ b/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py
@@ -39,8 +39,7 @@ from mindformers.parallel_core.inference.tensor_parallel.mappings import (
     scatter_to_model_parallel_region
 )
 from mindformers.parallel_core.inference.parallel_state import ProcessGroup, default_pgs
-from mindformers.parallel_core.inference.weights_utils import (split_loaded_weight, cpu_offload_weights_params,
-                                                               deal_training_moe_weight)
+from mindformers.parallel_core.inference.weights_utils import split_loaded_weight, deal_training_moe_weight
 
 
 class GroupedLinearMethodBase(QuantizeMethodBase):
@@ -390,7 +389,6 @@ class ColumnParallelGroupedLinear(GroupedLinearBase):
                     f" but got the shape of param is {(param.shape[1], param.data[expert_id].shape)} and "
                     f"the shape of weight is{loaded_weight.shape}")
             param[expert_id] = ms.from_numpy(loaded_weight)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class RowParallelGroupedLinear(GroupedLinearBase):
@@ -619,4 +617,3 @@ class RowParallelGroupedLinear(GroupedLinearBase):
                     f" but got the shape of param is {param.data[expert_id].shape} and "
                     f"the shape of weight is{loaded_weight.shape}")
             param[expert_id] = ms.from_numpy(loaded_weight)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
diff --git a/mindformers/parallel_core/inference/tensor_parallel/layers.py b/mindformers/parallel_core/inference/tensor_parallel/layers.py
index 5e96a7c6b..cbc9a5a60 100644
--- a/mindformers/parallel_core/inference/tensor_parallel/layers.py
+++ b/mindformers/parallel_core/inference/tensor_parallel/layers.py
@@ -39,8 +39,7 @@ from mindformers.parallel_core.inference.parallel_state import ProcessGroup, def
 from mindformers.parallel_core.inference.weights_utils import (set_weight_attrs, split_loaded_weight,
                                                                deal_linear_q_up_weight, deal_linear_kv_up_weight,
                                                                deal_linear_kv_down_weight, split_fusion_loaded_weight,
-                                                               cpu_offload_weights_params, deal_training_ffn_weight,
-                                                               deal_training_qkv_weight)
+                                                               deal_training_ffn_weight, deal_training_qkv_weight)
 from mindformers.parallel_core.inference.quantization.base_config import (QuantizeMethodBase,
                                                                           QuantizationConfig)
 from mindformers.version_control import is_310p
@@ -408,7 +407,6 @@ class ColumnParallelLinear(LinearBase):
         param.set_data(ms.from_numpy(loaded_weight))
         if is_310p() and param.name.endswith("weight"):
             self.format_to_nz(param)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class MergedColumnParallelLinear(ColumnParallelLinear):
@@ -482,7 +480,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
                     f"'{param.name}.shape' should be equal to 'loaded_weight.shape',"
                     f" but got the shape of param is {(param.shape)} and "
                     f"the shape of weight is{loaded_weight.shape}")
-            cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
             return
 
         tp_rank = self.tp_group.rank
@@ -537,7 +534,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
         loaded_shard_num = 2 # gating/hidden
         if is_310p() and param.name.endswith("weight"):
             self.format_to_nz(param, loaded_shard_num)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class QKVParallelLinear(ColumnParallelLinear):
@@ -632,7 +628,6 @@ class QKVParallelLinear(ColumnParallelLinear):
                     f"'{param.name}.shape' should be equal to 'loaded_weight.shape',"
                     f" but got the shape of param is {(param.shape)} and "
                     f"the shape of weight is{loaded_weight.shape}")
-            cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
             return
 
         tp_rank = self.tp_group.rank
@@ -687,7 +682,6 @@ class QKVParallelLinear(ColumnParallelLinear):
             # format cast after load q,k,v
             loaded_shard_num = 3
             self.format_to_nz(param, loaded_shard_num)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class RowParallelLinear(LinearBase):
@@ -889,7 +883,6 @@ class RowParallelLinear(LinearBase):
         param.set_data(ms.from_numpy(loaded_weight))
         if is_310p() and param.name.endswith("weight"):
             self.format_to_nz(param)
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class ReplicatedLinear(LinearBase):
@@ -1072,7 +1065,6 @@ class ReplicatedLinear(LinearBase):
                     f" but got the shape of param is {param.shape} "
                     f"and the shape of weight is{loaded_weight.shape}")
             param.set_data(ms.from_numpy(loaded_weight))
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class VocabParallelEmbedding(nn.Cell):
@@ -1219,7 +1211,6 @@ class VocabParallelEmbedding(nn.Cell):
             loaded_weight = ms.from_numpy(loaded_weight).astype(ms.float32).asnumpy()
         param.asnumpy()[:loaded_weight.shape[0]] = loaded_weight
         param.asnumpy()[loaded_weight.shape[0]:] = 0
-        cpu_offload_weights_params(param, self.config.cpu_offloading_weights)
 
 
 class UnquantizedEmbeddingMethod(QuantizeMethodBase):
diff --git a/mindformers/parallel_core/inference/weights_utils.py b/mindformers/parallel_core/inference/weights_utils.py
index 1d4806018..1f546eb50 100644
--- a/mindformers/parallel_core/inference/weights_utils.py
+++ b/mindformers/parallel_core/inference/weights_utils.py
@@ -22,7 +22,6 @@ from mindspore import Parameter
 from mindformers.parallel_core.inference.parallel_state import (get_tensor_model_parallel_world_size,
                                                                 get_tensor_model_parallel_rank)
 from mindformers.version_control import is_310p
-from mindformers.tools.logger import logger
 
 
 def set_weight_attrs(
@@ -399,21 +398,3 @@ def split_fusion_loaded_weight(loaded_weight, start_idxs, shard_sizes):
         loaded_weight_parts.append(loaded_weight[start_idx:start_idx + shard_size])
     perrank_ffn_weight = np.concatenate(loaded_weight_parts, axis=0)
     return perrank_ffn_weight
-
-
-# pylint: disable=W0212
-def cpu_offload_weights_params(param: Parameter = None, cpu_offloading_weights: bool = False):
-
-    """
-    Offload parameter weights to CPU memory.
-
-    Args:
-        param: Model parameter object that needs to support _offload() method
-        cpu_offloading_weights: Boolean value controlling whether to enable CPU offloading functionality
-
-    Returns:
-        None
-    """
-    if cpu_offloading_weights:
-        param._offload()
-        logger.debug(f'Offload {param.name} to CPU memory.')
diff --git a/mindformers/parallel_core/model_parallel_config.py b/mindformers/parallel_core/model_parallel_config.py
index d01dd35af..43e1c0361 100644
--- a/mindformers/parallel_core/model_parallel_config.py
+++ b/mindformers/parallel_core/model_parallel_config.py
@@ -155,9 +155,6 @@ class ModelParallelConfig:
     Default: None.
     """
 
-    cpu_offloading_weights: bool = False
-    """Enable offload of the weights or not. Default: False. Only used for predict mode."""
-
     op_swap: Optional[Union[list, dict]] = None
     """
     Configuration for operator swapping.
diff --git a/mindformers/parallel_core/transformer_config_utils.py b/mindformers/parallel_core/transformer_config_utils.py
index 47c962ad4..7d6e1dcc2 100644
--- a/mindformers/parallel_core/transformer_config_utils.py
+++ b/mindformers/parallel_core/transformer_config_utils.py
@@ -241,7 +241,6 @@ COMMON_CONFIG_MAPPING = {
     # not changes
     "op_swap": "op_swap",
     "default_prefetch": "default_prefetch",
-    "cpu_offloading_weights": "cpu_offloading_weights",
 
     # TransformerConfig
     # Model Architecture
diff --git a/mindformers/tools/register/template.py b/mindformers/tools/register/template.py
index 5659aac9c..f96a6f628 100644
--- a/mindformers/tools/register/template.py
+++ b/mindformers/tools/register/template.py
@@ -252,7 +252,6 @@ class GeneralConfig(Config):
     use_legacy = True
     ckpt_use_legacy_format = True
     pretrained_model_dir = ""
-    cpu_offloading_weights = False
     balanced_load = False
 
     # eval while training
diff --git a/mindformers/trainer/base_trainer.py b/mindformers/trainer/base_trainer.py
index 23995e6aa..91d7c02df 100644
--- a/mindformers/trainer/base_trainer.py
+++ b/mindformers/trainer/base_trainer.py
@@ -467,8 +467,6 @@ class BaseTrainer:
         logger.info(".........Build Network From Config..........")
         if self.config.get("pretrained_model_dir", None):
             self.config.model.pretrained_model_dir = self.config.pretrained_model_dir
-        if self.config.get("cpu_offloading_weights", False):
-            self.config.model.cpu_offloading_weights = self.config.cpu_offloading_weights
         if self.config.get("generation_config", None):
             self.config.model.generation_config = self.config.generation_config
         network = build_network(self.config.model, default_args=default_args)
diff --git a/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py b/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py
index 8a260eba8..28aefeaba 100644
--- a/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py
+++ b/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py
@@ -48,8 +48,8 @@ _CHECK_TEXT_MAPPING = {
     'eval_epoch_interval', 'eval_dataset', 'eval_dataset_task', 'lr_schedule', 'metric', 'model', 'moe_config',
     'optimizer', 'parallel_config', 'parallel', 'recompute_config', 'remove_redundancy', 'runner_config',
     'runner_wrapper', 'monitor_config', 'tensorboard', 'train_dataset_task', 'train_dataset', 'trainer',
-    'swap_config', 'use_legacy', 'pretrained_model_dir', 'print_separate_loss', 'cpu_offloading_weights',
-    'ckpt_use_legacy_format', 'balanced_load'
+    'swap_config', 'use_legacy', 'pretrained_model_dir', 'print_separate_loss', 'ckpt_use_legacy_format',
+    'balanced_load'
 }
 
 def generator_train():
-- 
Gitee