From 67b2931a1910e207fbde524a59487d6bd1f22eaf Mon Sep 17 00:00:00 2001 From: zxq <342239412@qq.com> Date: Thu, 18 Sep 2025 19:16:20 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90feature=E3=80=91=E3=80=90master?= =?UTF-8?q?=E3=80=91=E6=9D=83=E9=87=8D=E5=8A=A0=E8=BD=BD=E4=B8=8B=E6=9E=B6?= =?UTF-8?q?cpu=5Foffload=5Fweights=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mindformers/models/auto/auto_factory.py | 2 -- mindformers/models/build_config.py | 2 -- .../tensor_parallel/grouped_layers.py | 5 +---- .../inference/tensor_parallel/layers.py | 11 +---------- .../parallel_core/inference/weights_utils.py | 19 ------------------- .../parallel_core/model_parallel_config.py | 3 --- .../parallel_core/transformer_config_utils.py | 1 - mindformers/tools/register/template.py | 1 - mindformers/trainer/base_trainer.py | 2 -- .../test_tensorboard/test_tensorboard.py | 4 ++-- 10 files changed, 4 insertions(+), 46 deletions(-) diff --git a/mindformers/models/auto/auto_factory.py b/mindformers/models/auto/auto_factory.py index b9b0559a4..2bdb3c038 100644 --- a/mindformers/models/auto/auto_factory.py +++ b/mindformers/models/auto/auto_factory.py @@ -369,8 +369,6 @@ class _BaseAutoModelClass: config_args.parallel_config = TransformerOpParallelConfig( **config_args.parallel_config) config_args.model.model_config.parallel_config = config_args.parallel_config - if config_args.get("cpu_offloading_weights", False): - config_args.model.cpu_offloading_weights = config_args.cpu_offloading_weights model = build_network(config_args.model) logger.info("model built successfully!") return model diff --git a/mindformers/models/build_config.py b/mindformers/models/build_config.py index 47fbaf609..841c0fe3a 100644 --- a/mindformers/models/build_config.py +++ b/mindformers/models/build_config.py @@ -97,7 +97,6 @@ def get_model_config( config = copy.deepcopy(config) use_model_config = config.get_value("model_config", None) use_pretrained_model_dir = config.get_value("pretrained_model_dir", None) - cpu_offloading_weights = config.get_value("cpu_offloading_weights", False) if not use_pretrained_model_dir and not use_model_config: return None if use_pretrained_model_dir: @@ -118,7 +117,6 @@ def get_model_config( model_config['quantization'] = quant_config.get("quant_method") else: model_config = config.model_config - model_config.update({'cpu_offloading_weights': cpu_offloading_weights}) return MindFormerRegister.get_instance_from_cfg( model_config, MindFormerModuleType.CONFIG, default_args=default_args) return MindFormerRegister.get_instance(module_type, class_name, **kwargs) diff --git a/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py b/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py index 51de4a20b..fc1a9ba6a 100644 --- a/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py +++ b/mindformers/parallel_core/inference/tensor_parallel/grouped_layers.py @@ -39,8 +39,7 @@ from mindformers.parallel_core.inference.tensor_parallel.mappings import ( scatter_to_model_parallel_region ) from mindformers.parallel_core.inference.parallel_state import ProcessGroup, default_pgs -from mindformers.parallel_core.inference.weights_utils import (split_loaded_weight, cpu_offload_weights_params, - deal_training_moe_weight) +from mindformers.parallel_core.inference.weights_utils import split_loaded_weight, deal_training_moe_weight class GroupedLinearMethodBase(QuantizeMethodBase): @@ -390,7 +389,6 @@ class ColumnParallelGroupedLinear(GroupedLinearBase): f" but got the shape of param is {(param.shape[1], param.data[expert_id].shape)} and " f"the shape of weight is{loaded_weight.shape}") param[expert_id] = ms.from_numpy(loaded_weight) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class RowParallelGroupedLinear(GroupedLinearBase): @@ -619,4 +617,3 @@ class RowParallelGroupedLinear(GroupedLinearBase): f" but got the shape of param is {param.data[expert_id].shape} and " f"the shape of weight is{loaded_weight.shape}") param[expert_id] = ms.from_numpy(loaded_weight) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) diff --git a/mindformers/parallel_core/inference/tensor_parallel/layers.py b/mindformers/parallel_core/inference/tensor_parallel/layers.py index 5e96a7c6b..cbc9a5a60 100644 --- a/mindformers/parallel_core/inference/tensor_parallel/layers.py +++ b/mindformers/parallel_core/inference/tensor_parallel/layers.py @@ -39,8 +39,7 @@ from mindformers.parallel_core.inference.parallel_state import ProcessGroup, def from mindformers.parallel_core.inference.weights_utils import (set_weight_attrs, split_loaded_weight, deal_linear_q_up_weight, deal_linear_kv_up_weight, deal_linear_kv_down_weight, split_fusion_loaded_weight, - cpu_offload_weights_params, deal_training_ffn_weight, - deal_training_qkv_weight) + deal_training_ffn_weight, deal_training_qkv_weight) from mindformers.parallel_core.inference.quantization.base_config import (QuantizeMethodBase, QuantizationConfig) from mindformers.version_control import is_310p @@ -408,7 +407,6 @@ class ColumnParallelLinear(LinearBase): param.set_data(ms.from_numpy(loaded_weight)) if is_310p() and param.name.endswith("weight"): self.format_to_nz(param) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class MergedColumnParallelLinear(ColumnParallelLinear): @@ -482,7 +480,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear): f"'{param.name}.shape' should be equal to 'loaded_weight.shape'," f" but got the shape of param is {(param.shape)} and " f"the shape of weight is{loaded_weight.shape}") - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) return tp_rank = self.tp_group.rank @@ -537,7 +534,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear): loaded_shard_num = 2 # gating/hidden if is_310p() and param.name.endswith("weight"): self.format_to_nz(param, loaded_shard_num) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class QKVParallelLinear(ColumnParallelLinear): @@ -632,7 +628,6 @@ class QKVParallelLinear(ColumnParallelLinear): f"'{param.name}.shape' should be equal to 'loaded_weight.shape'," f" but got the shape of param is {(param.shape)} and " f"the shape of weight is{loaded_weight.shape}") - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) return tp_rank = self.tp_group.rank @@ -687,7 +682,6 @@ class QKVParallelLinear(ColumnParallelLinear): # format cast after load q,k,v loaded_shard_num = 3 self.format_to_nz(param, loaded_shard_num) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class RowParallelLinear(LinearBase): @@ -889,7 +883,6 @@ class RowParallelLinear(LinearBase): param.set_data(ms.from_numpy(loaded_weight)) if is_310p() and param.name.endswith("weight"): self.format_to_nz(param) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class ReplicatedLinear(LinearBase): @@ -1072,7 +1065,6 @@ class ReplicatedLinear(LinearBase): f" but got the shape of param is {param.shape} " f"and the shape of weight is{loaded_weight.shape}") param.set_data(ms.from_numpy(loaded_weight)) - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class VocabParallelEmbedding(nn.Cell): @@ -1219,7 +1211,6 @@ class VocabParallelEmbedding(nn.Cell): loaded_weight = ms.from_numpy(loaded_weight).astype(ms.float32).asnumpy() param.asnumpy()[:loaded_weight.shape[0]] = loaded_weight param.asnumpy()[loaded_weight.shape[0]:] = 0 - cpu_offload_weights_params(param, self.config.cpu_offloading_weights) class UnquantizedEmbeddingMethod(QuantizeMethodBase): diff --git a/mindformers/parallel_core/inference/weights_utils.py b/mindformers/parallel_core/inference/weights_utils.py index 1d4806018..1f546eb50 100644 --- a/mindformers/parallel_core/inference/weights_utils.py +++ b/mindformers/parallel_core/inference/weights_utils.py @@ -22,7 +22,6 @@ from mindspore import Parameter from mindformers.parallel_core.inference.parallel_state import (get_tensor_model_parallel_world_size, get_tensor_model_parallel_rank) from mindformers.version_control import is_310p -from mindformers.tools.logger import logger def set_weight_attrs( @@ -399,21 +398,3 @@ def split_fusion_loaded_weight(loaded_weight, start_idxs, shard_sizes): loaded_weight_parts.append(loaded_weight[start_idx:start_idx + shard_size]) perrank_ffn_weight = np.concatenate(loaded_weight_parts, axis=0) return perrank_ffn_weight - - -# pylint: disable=W0212 -def cpu_offload_weights_params(param: Parameter = None, cpu_offloading_weights: bool = False): - - """ - Offload parameter weights to CPU memory. - - Args: - param: Model parameter object that needs to support _offload() method - cpu_offloading_weights: Boolean value controlling whether to enable CPU offloading functionality - - Returns: - None - """ - if cpu_offloading_weights: - param._offload() - logger.debug(f'Offload {param.name} to CPU memory.') diff --git a/mindformers/parallel_core/model_parallel_config.py b/mindformers/parallel_core/model_parallel_config.py index d01dd35af..43e1c0361 100644 --- a/mindformers/parallel_core/model_parallel_config.py +++ b/mindformers/parallel_core/model_parallel_config.py @@ -155,9 +155,6 @@ class ModelParallelConfig: Default: None. """ - cpu_offloading_weights: bool = False - """Enable offload of the weights or not. Default: False. Only used for predict mode.""" - op_swap: Optional[Union[list, dict]] = None """ Configuration for operator swapping. diff --git a/mindformers/parallel_core/transformer_config_utils.py b/mindformers/parallel_core/transformer_config_utils.py index 47c962ad4..7d6e1dcc2 100644 --- a/mindformers/parallel_core/transformer_config_utils.py +++ b/mindformers/parallel_core/transformer_config_utils.py @@ -241,7 +241,6 @@ COMMON_CONFIG_MAPPING = { # not changes "op_swap": "op_swap", "default_prefetch": "default_prefetch", - "cpu_offloading_weights": "cpu_offloading_weights", # TransformerConfig # Model Architecture diff --git a/mindformers/tools/register/template.py b/mindformers/tools/register/template.py index 5659aac9c..f96a6f628 100644 --- a/mindformers/tools/register/template.py +++ b/mindformers/tools/register/template.py @@ -252,7 +252,6 @@ class GeneralConfig(Config): use_legacy = True ckpt_use_legacy_format = True pretrained_model_dir = "" - cpu_offloading_weights = False balanced_load = False # eval while training diff --git a/mindformers/trainer/base_trainer.py b/mindformers/trainer/base_trainer.py index 23995e6aa..91d7c02df 100644 --- a/mindformers/trainer/base_trainer.py +++ b/mindformers/trainer/base_trainer.py @@ -467,8 +467,6 @@ class BaseTrainer: logger.info(".........Build Network From Config..........") if self.config.get("pretrained_model_dir", None): self.config.model.pretrained_model_dir = self.config.pretrained_model_dir - if self.config.get("cpu_offloading_weights", False): - self.config.model.cpu_offloading_weights = self.config.cpu_offloading_weights if self.config.get("generation_config", None): self.config.model.generation_config = self.config.generation_config network = build_network(self.config.model, default_args=default_args) diff --git a/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py b/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py index 8a260eba8..28aefeaba 100644 --- a/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py +++ b/tests/st/test_ut/test_utils/test_tensorboard/test_tensorboard.py @@ -48,8 +48,8 @@ _CHECK_TEXT_MAPPING = { 'eval_epoch_interval', 'eval_dataset', 'eval_dataset_task', 'lr_schedule', 'metric', 'model', 'moe_config', 'optimizer', 'parallel_config', 'parallel', 'recompute_config', 'remove_redundancy', 'runner_config', 'runner_wrapper', 'monitor_config', 'tensorboard', 'train_dataset_task', 'train_dataset', 'trainer', - 'swap_config', 'use_legacy', 'pretrained_model_dir', 'print_separate_loss', 'cpu_offloading_weights', - 'ckpt_use_legacy_format', 'balanced_load' + 'swap_config', 'use_legacy', 'pretrained_model_dir', 'print_separate_loss', 'ckpt_use_legacy_format', + 'balanced_load' } def generator_train(): -- Gitee