From 0dfc1f3036c8e0e5fe8864fd59c8f842042e77f7 Mon Sep 17 00:00:00 2001 From: "maxiaohui (D)" <2732300406@qq.com> Date: Thu, 25 Sep 2025 17:11:30 +0800 Subject: [PATCH 1/4] test(sglang):add UT cases for sglang kernel(_fwd_kernel_ep_scatter_1) --- .../v0.4.8/test__fwd_kernel_ep_scatter_1.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py diff --git a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py new file mode 100644 index 0000000..a2e4589 --- /dev/null +++ b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py @@ -0,0 +1,57 @@ +import pytest +import triton +import torch +import triton.language as tl + +import sys +sys.path.append("..") +import test_common + + +#source /sglang/srt/layers/moe/ep_moe/kernels.py +@triton.jit +def _fwd_kernel_ep_scatter_1( + num_recv_tokens_per_expert, + expert_start_loc, + m_indices, + num_experts: tl.constexpr, + BLOCK_E: tl.constexpr, + BLOCK_EXPERT_NUM: tl.constexpr, +): + cur_expert = tl.program_id(0) + + offset_cumsum = tl.arange(0, BLOCK_EXPERT_NUM) + tokens_per_expert = tl.load( + num_recv_tokens_per_expert + offset_cumsum, + mask=offset_cumsum < num_experts, + other=0, + ) + cumsum = tl.cumsum(tokens_per_expert) - tokens_per_expert + tl.store(expert_start_loc + offset_cumsum, cumsum, mask=offset_cumsum < num_experts) + + cur_expert_start = tl.load(expert_start_loc + cur_expert) + cur_expert_token_num = tl.load(num_recv_tokens_per_expert + cur_expert) + + m_indices_start_ptr = m_indices + cur_expert_start + off_expert = tl.arange(0, BLOCK_E) + + for start_m in tl.range(0, cur_expert_token_num, BLOCK_E, num_stages=4): + tl.store( + m_indices_start_ptr + start_m + off_expert, + cur_expert, + ) + +def test_context_fwd_kernel(ptfile_path): + try: + data = torch.load(ptfile_path, map_location=torch.device('cpu'), weights_only=False) + except Exception as e: + pytest.fail(f"load file {ptfile_path} failed: {str(e)}") + + + input_data = test_common.convert_tensor_with_device_type(data["input_data"], device_type='npu') + _fwd_kernel_ep_scatter_1[data['grid']](**input_data) + + try: + test_common.compare_data_precision(data["gpu_output"], input_data, device_type='cpu') + except ValueError as e: + pytest.fail(f"The testcase failed") \ No newline at end of file -- Gitee From 79069ef34ef7a01208147e0814971dd2d22a4fdc Mon Sep 17 00:00:00 2001 From: "maxiaohui (D)" <2732300406@qq.com> Date: Thu, 25 Sep 2025 17:26:25 +0800 Subject: [PATCH 2/4] test(sglang): add UT cases for sglang kernel(_fwd_kernel_ep_scatter_1) --- ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py index a2e4589..6f25a53 100644 --- a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py +++ b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py @@ -8,7 +8,7 @@ sys.path.append("..") import test_common -#source /sglang/srt/layers/moe/ep_moe/kernels.py +#source /sglang/srt/layers/moe/ep_moe/kernels.py @triton.jit def _fwd_kernel_ep_scatter_1( num_recv_tokens_per_expert, -- Gitee From 7b23cc3afdf35faf7764176ed1f4915b2908e5cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E6=99=93=E8=BE=89?= <2732300406@qq.com> Date: Thu, 25 Sep 2025 09:52:51 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20asce?= =?UTF-8?q?nd/test/sglang/v0.4.8/test=5F=5Ffwd=5Fkernel=5Fep=5Fscatter=5F1?= =?UTF-8?q?.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v0.4.8/test__fwd_kernel_ep_scatter_1.py | 57 ------------------- 1 file changed, 57 deletions(-) delete mode 100644 ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py diff --git a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py deleted file mode 100644 index 6f25a53..0000000 --- a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest -import triton -import torch -import triton.language as tl - -import sys -sys.path.append("..") -import test_common - - -#source /sglang/srt/layers/moe/ep_moe/kernels.py -@triton.jit -def _fwd_kernel_ep_scatter_1( - num_recv_tokens_per_expert, - expert_start_loc, - m_indices, - num_experts: tl.constexpr, - BLOCK_E: tl.constexpr, - BLOCK_EXPERT_NUM: tl.constexpr, -): - cur_expert = tl.program_id(0) - - offset_cumsum = tl.arange(0, BLOCK_EXPERT_NUM) - tokens_per_expert = tl.load( - num_recv_tokens_per_expert + offset_cumsum, - mask=offset_cumsum < num_experts, - other=0, - ) - cumsum = tl.cumsum(tokens_per_expert) - tokens_per_expert - tl.store(expert_start_loc + offset_cumsum, cumsum, mask=offset_cumsum < num_experts) - - cur_expert_start = tl.load(expert_start_loc + cur_expert) - cur_expert_token_num = tl.load(num_recv_tokens_per_expert + cur_expert) - - m_indices_start_ptr = m_indices + cur_expert_start - off_expert = tl.arange(0, BLOCK_E) - - for start_m in tl.range(0, cur_expert_token_num, BLOCK_E, num_stages=4): - tl.store( - m_indices_start_ptr + start_m + off_expert, - cur_expert, - ) - -def test_context_fwd_kernel(ptfile_path): - try: - data = torch.load(ptfile_path, map_location=torch.device('cpu'), weights_only=False) - except Exception as e: - pytest.fail(f"load file {ptfile_path} failed: {str(e)}") - - - input_data = test_common.convert_tensor_with_device_type(data["input_data"], device_type='npu') - _fwd_kernel_ep_scatter_1[data['grid']](**input_data) - - try: - test_common.compare_data_precision(data["gpu_output"], input_data, device_type='cpu') - except ValueError as e: - pytest.fail(f"The testcase failed") \ No newline at end of file -- Gitee From fde736b9893c740ce8232c435941459f3cf49527 Mon Sep 17 00:00:00 2001 From: "maxiaohui (D)" <2732300406@qq.com> Date: Thu, 25 Sep 2025 18:01:34 +0800 Subject: [PATCH 4/4] test(sglang): add UT cases for sglang kernel(_fwd_kernel_ep_scatter_1) --- .../v0.4.8/test__fwd_kernel_ep_scatter_1.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py diff --git a/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py new file mode 100644 index 0000000..ecdfab6 --- /dev/null +++ b/ascend/test/sglang/v0.4.8/test__fwd_kernel_ep_scatter_1.py @@ -0,0 +1,60 @@ +import sys +import pytest +import torch + +import triton +import triton.language as tl + + +sys.path.append("..") +import test_common + + +#source python\sglang\srt\layers\moe\ep_moe\kernels.py +@triton.jit +def _fwd_kernel_ep_scatter_1( + num_recv_tokens_per_expert, + expert_start_loc, + m_indices, + num_experts: tl.constexpr, + BLOCK_E: tl.constexpr, + BLOCK_EXPERT_NUM: tl.constexpr, +): + cur_expert = tl.program_id(0) + + offset_cumsum = tl.arange(0, BLOCK_EXPERT_NUM) + tokens_per_expert = tl.load( + num_recv_tokens_per_expert + offset_cumsum, + mask=offset_cumsum < num_experts, + other=0, + ) + cumsum = tl.cumsum(tokens_per_expert) - tokens_per_expert + tl.store(expert_start_loc + offset_cumsum, cumsum, mask=offset_cumsum < num_experts) + + cur_expert_start = tl.load(expert_start_loc + cur_expert) + cur_expert_token_num = tl.load(num_recv_tokens_per_expert + cur_expert) + + m_indices_start_ptr = m_indices + cur_expert_start + off_expert = tl.arange(0, BLOCK_E) + + for start_m in tl.range(0, cur_expert_token_num, BLOCK_E, num_stages=4): + tl.store( + m_indices_start_ptr + start_m + off_expert, + cur_expert, + ) + + +def test_context_fwd_kernel(ptfile_path): + try: + data = torch.load(ptfile_path, map_location=torch.device('cpu'), weights_only=False) + except Exception as e: + pytest.fail(f"load file {ptfile_path} failed: {str(e)}") + + + input_data = test_common.convert_tensor_with_device_type(data["input_data"], device_type='npu') + _fwd_kernel_ep_scatter_1[data['grid']](**input_data) + + try: + test_common.compare_data_precision(data["gpu_output"], input_data, device_type='cpu') + except ValueError as e: + pytest.fail(f"The testcase failed") \ No newline at end of file -- Gitee