From b1a1387721b1f7b65ed3c84bf6a9f7137677889c Mon Sep 17 00:00:00 2001
From: ivanshan_8170 <shanzidan@h-partners.com>
Date: Tue, 26 Aug 2025 21:13:59 +0800
Subject: [PATCH] lccl2hccl

---
 include/atb/infer_op_params.h                 |  2 +-
 .../all_gather/all_gather_operation.cpp       | 39 ++++-----
 .../all_reduce/all_reduce_operation.cpp       | 13 +--
 .../all_to_all/all_to_all_lccl_runner.cpp     |  2 +-
 .../all_to_all/all_to_all_operation.cpp       | 86 +++++++++++--------
 5 files changed, 75 insertions(+), 67 deletions(-)
diff --git a/include/atb/infer_op_params.h b/include/atb/infer_op_params.h
index 7896da47..82f65ae0 100644
--- a/include/atb/infer_op_params.h
+++ b/include/atb/infer_op_params.h
@@ -2409,7 +2409,7 @@ struct AllToAllParam {
     //! 多通信域并行功能使用结束后，"LCCL_PARALLEL"需要设置为0或者false，否则会导致基础场景性能下降。
     std::string commDomain;
     //! \brief 通信结果对输入进行转置。
-    //! 仅当backend为"lccl"时生效
+    //! 为true时使用lccl
     bool transpose = false;
     //!
     //! \brief 预留参数
diff --git a/src/ops_infer/all_gather/all_gather_operation.cpp b/src/ops_infer/all_gather/all_gather_operation.cpp
index 70615ce1..e4a0e0f6 100644
--- a/src/ops_infer/all_gather/all_gather_operation.cpp
+++ b/src/ops_infer/all_gather/all_gather_operation.cpp
@@ -35,9 +35,9 @@ template <> Status CreateOperation(const infer::AllGatherParam &opParam, Operati
         ATB_LOG(ERROR) << "backend is " << opParam.backend << "backend must either be hccl or lccl";
         return ERROR_INVALID_PARAM;
     }
-    if (opParam.backend == "lccl" && GetSingleton<Config>().Is310P()) {
-        ATB_LOG(ERROR) << "AllGather lccl is not support in Atlas inference products";
-        return ERROR_INVALID_PARAM;
+    if (opParam.backend == "lccl") {
+        ATB_LOG(WARN)
+            << "DEPRECATED: backend as lccl is no longer suppported and will be removed soon. Please use hccl instead";
     }
     if (OperationUtil::DistributedInitCheck<infer::AllGatherParam>(opParam) != NO_ERROR) {
         ATB_LOG(ERROR) << "AllGatherOperation DistributedInitCheck failed";
@@ -84,8 +84,8 @@ Status AllGatherOperation::InferShapeImpl(const SVector<TensorDesc> &inTensorDes
 Status AllGatherOperation::InferShapeCheckImpl(const SVector<TensorDesc> &inTensorDescs) const
 {
     if (inTensorDescs.at(0).shape.dimNum >= MAX_DIM) {
-        ATB_LOG(ERROR) << "inTensor(0) dimNum should <  MAX_DIM(8)";
-        return ERROR_INVALID_TENSOR_DIM;
+        ATB_LOG(ERROR) << "inTensor(0) dimNum should < MAX_DIM(8), but got " << inTensorDescs.at(0).shape.dimNum;
+        return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     return NO_ERROR;
 }
@@ -93,15 +93,17 @@ Status AllGatherOperation::InferShapeCheckImpl(const SVector<TensorDesc> &inTens
 Status AllGatherOperation::SetupCheckImpl(const SVector<Tensor> &inTensors, const SVector<Tensor> &outTensors) const
 {
     if (inTensors.at(0).desc.shape.dimNum >= MAX_DIM) {
-        ATB_LOG(ERROR) << "inTensor(0) dimNum should <  MAX_DIM(8)";
-        return ERROR_INVALID_TENSOR_DIM;
+        ATB_LOG(ERROR) << "inTensor(0) dimNum should < MAX_DIM(8), but got " << inTensorDescs.at(0).shape.dimNum;
+        return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     if (outTensors.at(0).desc.shape.dimNum != (inTensors.at(0).desc.shape.dimNum + 1)) {
-        ATB_LOG(ERROR) << "outTensor dim should be one larger than inTensor dim";
-        return ERROR_INVALID_TENSOR_DIM;
+        ATB_LOG(ERROR) << "outTensor dimNum[" << outTensors.at(0).desc.shape.dimNum
+                       << "] should be one larger than inTensor dimNum[" << inTensors.at(0).desc.shape.dimNum << "]";
+        return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     if (outTensors.at(0).desc.shape.dims[0] != param_.rankSize) {
-        ATB_LOG(ERROR) << "outTensor first dimension does not match rankSize";
+        ATB_LOG(ERROR) << "outTensor first dimension[" << outTensors.at(0).desc.shape.dims[0]
+                       << "] does not match rankSize[" << param_.rankSize << "]";
         return ERROR_INVALID_TENSOR_DIM;
     }
     return NO_ERROR;
@@ -109,18 +111,13 @@ Status AllGatherOperation::SetupCheckImpl(const SVector<Tensor> &inTensors, cons
 
 std::shared_ptr<Runner> AllGatherOperation::CreateRunner(Context &context) const
 {
-    (void)context;
-    if (param_.backend == "hccl") {
-        if (param_.hcclComm == nullptr) {
-            return std::make_shared<AllGatherHcclRunner>(param_, !param_.rankTableFile.empty());
-        } else {
-            return std::make_shared<AllGatherHcclRunner>(param_, param_.hcclComm);
-        }
-    } else if (param_.backend == "lccl") {
-        return std::make_shared<AllGatherLcclRunner>(param_, context);
+    if (param_.commMode == infer::CommMode::COMM_MULTI_THREAD) {
+        return std::make_shared<AllToAllLcclRunner>(param_, context);
+    }
+    if (param_.hcclComm == nullptr) {
+        return std::make_shared<AllToAllHcclRunner>(param_, !param_.rankTableFile.empty());
     }
-    ATB_LOG(FATAL) << "AllGatherOperation::AllGatherOperation backend " << param_.backend << "is not exist.";
-    return std::shared_ptr<Runner>();
+    return std::make_shared<AllToAllHcclRunner>(param_, param_.hcclComm);
 }
 
 nlohmann::json AllGatherOperation::GetParamJson() const
diff --git a/src/ops_infer/all_reduce/all_reduce_operation.cpp b/src/ops_infer/all_reduce/all_reduce_operation.cpp
index 6e27695c..93c9840f 100644
--- a/src/ops_infer/all_reduce/all_reduce_operation.cpp
+++ b/src/ops_infer/all_reduce/all_reduce_operation.cpp
@@ -233,15 +233,10 @@ Status AllReduceOperation::QuantShapeCheck(const TensorDesc &scale, const Tensor
 
 std::shared_ptr<Runner> AllReduceOperation::CreateRunner(Context &context) const
 {
-    (void)context;
-    if (param_.backend == "hccl") {
-        if (param_.hcclComm == nullptr) {
-            return std::make_shared<AllReduceHcclRunner>(param_, !param_.rankTableFile.empty());
-        } else {
-            return std::make_shared<AllReduceHcclRunner>(param_, param_.hcclComm);
-        }
-    } else if (param_.backend == "lccl") {
-        return std::make_shared<AllReduceLcclRunner>(param_, context);
+    if (param_.hcclComm == nullptr) {
+        return std::make_shared<AllReduceHcclRunner>(param_, !param_.rankTableFile.empty());
+    } else {
+        return std::make_shared<AllReduceHcclRunner>(param_, param_.hcclComm);
     }
     return std::shared_ptr<Runner>();
 }
diff --git a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
index 8faf35f0..d6c7cd03 100644
--- a/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
+++ b/src/ops_infer/all_to_all/all_to_all_lccl_runner.cpp
@@ -49,7 +49,7 @@ Status AllToAllLcclRunner::ExecuteImpl(RunnerVariantPack &runnerVariantPack)
                        GetExecuteStream(runnerVariantPack.context));
     }
     if (ret == Lcal::LCAL_ERROR_PARA_CHECK_FAIL) {
-        ATB_LOG(ERROR) << "ret: " << ret << " LCCL_PARALLEL should be 0 or fasle";
+        ATB_LOG(ERROR) << "ret: " << ret << " LCCL_PARALLEL should be 0 or false";
         return ERROR_INVALID_SINGLE_OPERATION_PARAM;
     }
     if (ret != 0) {
diff --git a/src/ops_infer/all_to_all/all_to_all_operation.cpp b/src/ops_infer/all_to_all/all_to_all_operation.cpp
index a0b635d0..21c3cdfd 100644
--- a/src/ops_infer/all_to_all/all_to_all_operation.cpp
+++ b/src/ops_infer/all_to_all/all_to_all_operation.cpp
@@ -35,10 +35,15 @@ template <> Status CreateOperation(const infer::AllToAllParam &opParam, Operatio
         return ERROR_INVALID_PARAM;
     }
     OP_PARAM_RSV_CHECK(opParam);
+    ATB_LOG(INFO) << "AlltoAll rank:" << opParam.headNum;
     if (opParam.backend != "hccl" && opParam.backend != "lccl") {
         ATB_LOG(ERROR) << "backend is " << opParam.backend << "backend must be hccl or lccl";
         return ERROR_INVALID_PARAM;
     }
+    if (opParam.backend == "lccl") {
+        ATB_LOG(WARN)
+            << "DEPRECATED: backend as lccl is no longer suppported and will be removed soon. Please use hccl instead";
+    }
     const char *socName = aclrtGetSocName();
     if (!socName) {
         ATB_LOG(ERROR) << "aclrtGetSocName failed!";
@@ -54,19 +59,15 @@ template <> Status CreateOperation(const infer::AllToAllParam &opParam, Operatio
             ATB_LOG(ERROR) << "AllToAll hccl only supports Atlas 800I A2/A3 or Atlas 900 A3 Superpod";
             return ERROR_INVALID_PARAM;
         }
-        if (opParam.transpose) {
-            ATB_LOG(ERROR) << "AllToAll hccl doesn't support transpose";
-            return ERROR_INVALID_PARAM;
-        }
-    }
-    if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize
-        ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize";
-        return ERROR_INVALID_PARAM;
     }
     if (OperationUtil::DistributedInitCheck<infer::AllToAllParam>(opParam) != NO_ERROR) {
         ATB_LOG(ERROR) << "AllToAllOperation DistributedInitCheck failed";
         return ERROR_INVALID_PARAM;
     }
+    if (opParam.backend == "lccl" && opParam.rankSize % 2 != 0) { // 2 : Even ranksize
+        ATB_LOG(ERROR) << "AllToAll lccl only supports even ranksize";
+        return ERROR_INVALID_PARAM;
+    }
     *operation = new (std::nothrow) AllToAllOperation(opParam);
     if (*operation == nullptr) {
         ATB_LOG(ERROR) << "failed to new AllToAllOperation";
@@ -103,24 +104,33 @@ Status AllToAllOperation::InferShapeCheckImpl(const SVector<TensorDesc> &inTenso
         return NO_ERROR;
     }
     if (inTensorDescs.at(0).shape.dimNum != TRANSPOSE_IN_TENSOR_DIM_NUM) { // 2: transpose only support dimNum
-        ATB_LOG(ERROR) << "inTensor[0] dimNum should be " << TRANSPOSE_IN_TENSOR_DIM_NUM
-                       << ", but got: " << inTensorDescs.at(0).shape.dimNum;
+        ATB_LOG(ERROR) << GetLogPrefix() << "AllToAll with tranpose: inTensor[0] dimNum should be "
+                       << TRANSPOSE_IN_TENSOR_DIM_NUM << ", but got: " << inTensorDescs.at(0).shape.dimNum;
         return ERROR_INVALID_TENSOR_DIM_NUM;
     }
     if (inTensorDescs.at(0).shape.dims[1] % param_.rankSize != 0) {
-        ATB_LOG(ERROR) << "intensors[0].dims[0] must be an integer multiple of ranksize but got dims[0]: "
-                       << inTensorDescs.at(0).shape.dims[1] << ", rankSize: " << param_.rankSize;
+        ATB_LOG(ERROR)
+            << GetLogPrefix()
+            << "AllToAll with tranpose: intensors[0].dims[0] must be an integer multiple of ranksize but got dims[0]: "
+            << inTensorDescs.at(0).shape.dims[1] << ", rankSize: " << param_.rankSize;
         return ERROR_INVALID_TENSOR_DIM;
     }
     int64_t wSize = inTensorDescs.at(0).shape.dims[TRANSPOSE_IN_TENSOR_DIM_NUM - 1] *
                     static_cast<int64_t>(sizeof(inTensorDescs.at(0).dtype));
     if (wSize / param_.rankSize >= MAX_W_SIZE) {
-        ATB_LOG(ERROR) << "intensors[0].dims[1] / rankSize must be no greater than 90K, but got bytes: " << wSize;
+        ATB_LOG(ERROR)
+            << GetLogPrefix()
+            << "AllToAll with tranpose: intensors[0].dims[1] / rankSize must be no greater than 90K, but got bytes: "
+            << wSize,
+            ", rankSize: " << param_.rankSize;
         return ERROR_INVALID_TENSOR_DIM;
     }
     uint64_t tensorSize = Utils::GetTensorSize(inTensorDescs.at(0));
     if (tensorSize > MAX_TENSOR_SIZE) {
-        ATB_LOG(ERROR) << "intensors[0] total tensor size must be no greater than 190MB, but got bytes: " << tensorSize;
+        ATB_LOG(ERROR)
+            << GetLogPrefix()
+            << "AllToAll with tranpose: intensors[0] total tensor size must be no greater than 190MB, but got bytes: "
+            << tensorSize;
         return ERROR_INVALID_TENSOR_DIM;
     }
     return NO_ERROR;
@@ -130,7 +140,7 @@ Status AllToAllOperation::InferShapeImpl(const SVector<TensorDesc> &inTensorDesc
                                          SVector<TensorDesc> &outTensorDescs) const
 {
     outTensorDescs.at(0) = inTensorDescs.at(0);
-    if (param_.backend == "lccl" && param_.transpose) {
+    if (param_.transpose) { // lccl
         outTensorDescs.at(0).shape.dims[0] = inTensorDescs.at(0).shape.dims[0] * param_.rankSize;
         outTensorDescs.at(0).shape.dims[1] = inTensorDescs.at(0).shape.dims[1] / param_.rankSize;
     }
@@ -146,30 +156,39 @@ Status AllToAllOperation::SetupCheckImpl(const SVector<Tensor> &inTensors, const
         return st;
     }
     if (!param_.transpose && !TensorUtil::TensorDescEqual(inTensors.at(0).desc, outTensors.at(0).desc)) {
-        ATB_LOG(ERROR) << GetLogPrefix() << "intensor desc and outtensor desc should be same";
+        ATB_LOG(ERROR) << GetLogPrefix()
+                       << "AllToAll without tranpose: intensor desc and outtensor desc should be same";
         return ERROR_INVALID_TENSOR_DIM;
     }
     if (param_.transpose) {
         if (inTensors.at(0).desc.shape.dimNum != TRANSPOSE_IN_TENSOR_DIM_NUM) {
-            ATB_LOG(ERROR) << "invalid inTensor dimNum, should be 2, but got inTensors[0] dimNum: "
-                           << inTensors.at(0).desc.shape.dimNum;
+            ATB_LOG(ERROR)
+                << GetLogPrefix()
+                << "AllToAll with tranpose: invalid inTensor dimNum, should be 2, but got inTensors[0] dimNum: "
+                << inTensors.at(0).desc.shape.dimNum;
             return ERROR_INVALID_TENSOR_DIM_NUM;
         }
         if (outTensors.at(0).desc.shape.dimNum != TRANSPOSE_IN_TENSOR_DIM_NUM) {
-            ATB_LOG(ERROR) << "invalid outTensor dimNum, should be 2, but got outTensors[0] dimNum: "
-                           << outTensors.at(0).desc.shape.dimNum;
+            ATB_LOG(ERROR)
+                << GetLogPrefix()
+                << "AllToAll with tranpose: invalid outTensor dimNum, should be 2, but got outTensors[0] dimNum: "
+                << outTensors.at(0).desc.shape.dimNum;
             return ERROR_INVALID_TENSOR_DIM_NUM;
         }
         if (outTensors.at(0).desc.shape.dims[0] != inTensors.at(0).desc.shape.dims[0] * param_.rankSize) {
-            ATB_LOG(ERROR) << "invalid outTensor dims[0] should be intensors[0].dims[0], * rankSize, i.e. "
-                           << inTensors.at(0).desc.shape.dims[0] << " * " << param_.rankSize << ", but got "
-                           << outTensors.at(0).desc.shape.dims[0];
+            ATB_LOG(ERROR)
+                << GetLogPrefix()
+                << "AllToAll with tranpose: invalid outTensor dims[0] should be intensors[0].dims[0], * rankSize, i.e. "
+                << inTensors.at(0).desc.shape.dims[0] << " * " << param_.rankSize << ", but got "
+                << outTensors.at(0).desc.shape.dims[0];
             return ERROR_INVALID_TENSOR_DIM;
         }
         if (outTensors.at(0).desc.shape.dims[1] * param_.rankSize != inTensors.at(0).desc.shape.dims[1]) {
-            ATB_LOG(ERROR) << "invalid outTensor dims[1], should be intensors[0].dims[1]/rankSize, i.e. "
-                           << inTensors.at(0).desc.shape.dims[1] << " / " << param_.rankSize << ", but got "
-                           << outTensors.at(0).desc.shape.dims[1];
+            ATB_LOG(ERROR)
+                << GetLogPrefix()
+                << "AllToAll with tranpose: invalid outTensor dims[1], should be intensors[0].dims[1]/rankSize, i.e. "
+                << inTensors.at(0).desc.shape.dims[1] << " / " << param_.rankSize << ", but got "
+                << outTensors.at(0).desc.shape.dims[1];
             return ERROR_INVALID_TENSOR_DIM;
         }
     }
@@ -178,17 +197,14 @@ Status AllToAllOperation::SetupCheckImpl(const SVector<Tensor> &inTensors, const
 
 std::shared_ptr<Runner> AllToAllOperation::CreateRunner(Context &context) const
 {
-    (void)context;
-    if (param_.backend == "hccl") {
-        if (param_.hcclComm == nullptr) {
-            return std::make_shared<AllToAllHcclRunner>(param_, !param_.rankTableFile.empty());
-        } else {
-            return std::make_shared<AllToAllHcclRunner>(param_, param_.hcclComm);
-        }
-    } else if (param_.backend == "lccl") {
+    // only transpose use lccl
+    if (param_.tranpose || param_.commMode == infer::CommMode::COMM_MULTI_THREAD) {
         return std::make_shared<AllToAllLcclRunner>(param_, context);
     }
-    return std::shared_ptr<Runner>();
+    if (param_.hcclComm == nullptr) {
+        return std::make_shared<AllToAllHcclRunner>(param_, !param_.rankTableFile.empty());
+    }
+    return std::make_shared<AllToAllHcclRunner>(param_, param_.hcclComm);
 }
 
 nlohmann::json AllToAllOperation::GetParamJson() const
-- 
Gitee