diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_addcdiv.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_addcdiv.py
new file mode 100644
index 0000000000000000000000000000000000000000..befacc90ac5264d452e5061b8197a323d9b75b6b
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_addcdiv.py
@@ -0,0 +1,51 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self, value=1.0):
+        super(Model, self).__init__()
+        self.value = value
+
+    def forward(self, input_tensor, tensor1, tensor2):
+        # torch.addcdiv(input, tensor1, tensor2, *, value=1, out=None)
+        # Performs the element-wise division of tensor1 by tensor2, multiplies the result by value, and adds it to input.
+        # This operation is commonly used in neural networks for:
+        # - Implementing specific mathematical formulas
+        # - Normalization operations
+        return torch.addcdiv(input_tensor, tensor1, tensor2, value=self.value)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+    tensor1_1 = torch.randn(256, 512, dtype=torch.float32)
+    tensor2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+    tensor1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    tensor2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+    tensor1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    tensor2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+    tensor1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    tensor2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1, tensor1_1, tensor2_1],
+        [input_tensor2, tensor1_2, tensor2_2],
+        [input_tensor3, tensor1_3, tensor2_3],
+        [input_tensor4, tensor1_4, tensor2_4]
+    ]
+
+
+def get_init_inputs():
+    # Parameters for addcdiv
+    value = 1.0  # Scale factor
+    return [value]
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_bitwise_and.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_bitwise_and.py
new file mode 100644
index 0000000000000000000000000000000000000000..2665c12e7310d1c3e9a69cab9fed144e47d8fa12
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_bitwise_and.py
@@ -0,0 +1,47 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.bitwise_and(input, other, *, out=None)
+        # Computes the element-wise bitwise AND of the given input tensors.
+        # Zeros are treated as False and nonzeros are treated as True.
+        # This operation is commonly used in neural networks for:
+        # - Implementing bit manipulation operations
+        # - Creating bit masks
+        # - Low-level data processing
+        return torch.bitwise_and(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randint(0, 256, (256, 512), dtype=torch.int32)
+    input1_2 = torch.randint(0, 256, (256, 512), dtype=torch.int32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input2_1 = torch.randint(0, 256, (1024, 4096), dtype=torch.int32)
+    input2_2 = torch.randint(0, 256, (1024, 4096), dtype=torch.int32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input3_1 = torch.randint(0, 256, (2048, 4096), dtype=torch.int32)
+    input3_2 = torch.randint(0, 256, (2048, 4096), dtype=torch.int32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input4_1 = torch.randint(0, 256, (768, 2688), dtype=torch.int32)
+    input4_2 = torch.randint(0, 256, (768, 2688), dtype=torch.int32)
+
+    return [
+        [input1_1, input1_2],
+        [input2_1, input2_2],
+        [input3_1, input3_2],
+        [input4_1, input4_2]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for bitwise_and
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_broadcast.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_broadcast.py
new file mode 100644
index 0000000000000000000000000000000000000000..72a5b7fb6b745f88cd3d0b55fc30a3c3d43c50d5
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_broadcast.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self, shape=None):
+        super(Model, self).__init__()
+        self.shape = shape
+
+    def forward(self, input_tensor):
+        # torch.broadcast_to(input, shape)
+        # Broadcasts input to the shape shape.
+        # Broadcasting is the process of making tensors with different shapes have compatible shapes for element-wise operations.
+        # This operation is commonly used in neural networks for:
+        # - Expanding tensors to match dimensions for broadcasting
+        # - Creating patterned matrices
+        # - Implementing certain attention mechanisms
+        return torch.broadcast_to(input_tensor, self.shape)
+
+
+def get_inputs_dyn_list():
+    input_tensor_1 = torch.randn(1, 4096, dtype=torch.float32)
+    input_tensor_2 = torch.randn(1024, 1, dtype=torch.float32)
+    return [[input_tensor_1], [input_tensor_2]]
+
+def get_init_inputs():
+    # Specific shape for broadcasting
+    # Broadcast to (1024, 4096)
+    shape = (1024, 4096)
+    return [shape]
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cdiv.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cdiv.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfc3088f225ace17dc1c6ddc1eeeda11025eda8f
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cdiv.py
@@ -0,0 +1,47 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, dividend, divisor):
+        # torch.div(input, other, *, rounding_mode='trunc', out=None)
+        # Performs division with truncation towards zero.
+        # This is equivalent to C-style integer division.
+        # This operation is commonly used in neural networks for:
+        # - Implementing ceiling division operations
+        # - Calculating grid dimensions in CUDA kernels
+        # - Mathematical transformations that require integer division
+        return torch.div(dividend, divisor, rounding_mode='trunc')
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    dividend1 = torch.randn(256, 512, dtype=torch.float32)
+    divisor1 = torch.randn(256, 512, dtype=torch.float32) + 1e-6
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    dividend2 = torch.randn(1024, 4096, dtype=torch.float32)
+    divisor2 = torch.randn(1024, 4096, dtype=torch.float32) + 1e-6
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    dividend3 = torch.randn(2048, 4096, dtype=torch.float32)
+    divisor3 = torch.randn(2048, 4096, dtype=torch.float32) + 1e-6
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    dividend4 = torch.randn(768, 2688, dtype=torch.float32)
+    divisor4 = torch.randn(768, 2688, dtype=torch.float32) + 1e-6
+
+    return [
+        [dividend1, divisor1],
+        [dividend2, divisor2],
+        [dividend3, divisor3],
+        [dividend4, divisor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for cdiv
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cos.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cos.py
new file mode 100644
index 0000000000000000000000000000000000000000..f53cece3593757b38d74590875e0130e455d8136
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_cos.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.cos(input, *, out=None)
+        # Computes the element-wise cosine of the input tensor.
+        # This operation is commonly used in neural networks for:
+        # - Implementing periodic activation functions
+        # - Positional encoding in transformers
+        # - Signal processing operations
+        return torch.cos(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for cos
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_equal.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_equal.py
new file mode 100644
index 0000000000000000000000000000000000000000..73b65f49f4dd99cff5e1d7476fb5b635b15bbbd5
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_equal.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.eq(input, other, *, out=None)
+        # Computes element-wise equality comparison
+        # This operation is commonly used in neural networks for:
+        # - Implementing comparison operations
+        # - Creating masks for conditional operations
+        # - Checking tensor equality in tests or validation
+        return torch.eq(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    input2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    input2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input2_1],
+        [input1_2, input2_2],
+        [input1_3, input2_3],
+        [input1_4, input2_4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for equal
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_exp_001.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_exp_001.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9658f9c9c1f8c693cf093db50fb0eceeef56705
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_exp_001.py
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+
+class Model(nn.Module):
+    """
+    Exponential activation function operation.
+    This operation is commonly used in neural networks for:
+    - Activation function that computes element-wise exponential
+    - Used in some probabilistic models and attention mechanisms
+    - Maps input values to positive outputs
+    
+    Formula: output = exp(input)
+    """
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # Exponential activation function applied to input_tensor
+        result = torch.exp(input_tensor)
+        return result
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=16, seq=64, hidden=512)
+    input_tensor1 = torch.randn(16, 64, 512, dtype=torch.float32) * 0.1
+
+    # Case 2: Middle (batch=32, seq=512, hidden=1024)
+    input_tensor2 = torch.randn(32, 512, 1024, dtype=torch.float32) * 0.1
+
+    # Case 3: Large (batch=64, seq=2048, hidden=4096)
+    input_tensor3 = torch.randn(64, 2048, 4096, dtype=torch.float32) * 0.1
+
+    # Case 4: Non-aligned (batch=48, seq=256, hidden=2688)
+    input_tensor4 = torch.randn(48, 256, 2688, dtype=torch.float32) * 0.1
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+def get_init_inputs():
+    # No parameters for Exponential activation operation
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater.py
new file mode 100644
index 0000000000000000000000000000000000000000..da8cd884221500d2284047364217cecf1ba4d24c
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.gt(input, other, *, out=None)
+        # Computes input > other elementwise.
+        # This operation is commonly used in neural networks for:
+        # - Implementing comparison operations
+        # - Creating masks for conditional operations
+        # - Thresholding operations
+        return torch.gt(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    input2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    input2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input2_1],
+        [input1_2, input2_2],
+        [input1_3, input2_3],
+        [input1_4, input2_4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for greater
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater_equal.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater_equal.py
new file mode 100644
index 0000000000000000000000000000000000000000..53f7f3efad53850cfa4224ee4d5afcf07e2915da
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_greater_equal.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.ge(input, other, *, out=None)
+        # Computes input >= other elementwise.
+        # This operation is commonly used in neural networks for:
+        # - Implementing comparison operations
+        # - Creating masks for conditional operations
+        # - Thresholding operations
+        return torch.ge(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    input2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    input2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input2_1],
+        [input1_2, input2_2],
+        [input1_3, input2_3],
+        [input1_4, input2_4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for greater_equal
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_less.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_less.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a1de357178f897d05788e7509e7e939059da0e7
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_less.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.lt(input, other, *, out=None)
+        # Computes input < other elementwise.
+        # This operation is commonly used in neural networks for:
+        # - Implementing comparison operations
+        # - Creating masks for conditional operations
+        # - Implementing certain activation functions or gating mechanisms
+        return torch.lt(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input1_2 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input2_1 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input3_1 = torch.randn(2048, 4096, dtype=torch.float32)
+    input3_2 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input4_1 = torch.randn(768, 2688, dtype=torch.float32)
+    input4_2 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input1_2],
+        [input2_1, input2_2],
+        [input3_1, input3_2],
+        [input4_1, input4_2]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for less
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_linspace.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_linspace.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e5c9d65f50dfbddf63ec866a835045d8fdb6823
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_linspace.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.linspace(start, end, steps, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False)
+        # Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to end, inclusive.
+        # This operation is commonly used in neural networks for:
+        # - Creating evenly spaced values for interpolation
+        # - Generating sequences with specific spacing
+        # - Implementing certain mathematical transformations
+        return torch.linspace(0.0, 1.0, 4096, dtype=torch.float32)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(761, 1344, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for linspace
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_logical_not.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_logical_not.py
new file mode 100644
index 0000000000000000000000000000000000000000..c955ba3ba1f261adc674d78ea57f959c98bae931
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_logical_not.py
@@ -0,0 +1,43 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.logical_not(input, *, out=None)
+        # Computes the element-wise logical NOT of the given input tensor.
+        # Zeros are treated as False and nonzeros are treated as True.
+        # This operation is commonly used in neural networks for:
+        # - Inverting boolean masks
+        # - Implementing negation of conditions
+        # - Creating complementary masks
+        return torch.logical_not(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randint(0, 2, (256, 512), dtype=torch.bool)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randint(0, 2, (1024, 4096), dtype=torch.bool)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randint(0, 2, (2048, 4096), dtype=torch.bool)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randint(0, 2, (768, 2688), dtype=torch.bool)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for logical_not
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_maximum.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_maximum.py
new file mode 100644
index 0000000000000000000000000000000000000000..76381595ce0f17f4c2c9434ca7b8f50a692d74be
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_maximum.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.maximum(input, other, *, out=None)
+        # Computes the element-wise maximum of input and other.
+        # This operation is commonly used in neural networks for:
+        # - Implementing ReLU activation functions
+        # - Clamping values to a minimum threshold
+        # - Combining feature maps with element-wise maximum
+        return torch.maximum(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    input2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    input2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input2_1],
+        [input1_2, input2_2],
+        [input1_3, input2_3],
+        [input1_4, input2_4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for maximum
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_minimum.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_minimum.py
new file mode 100644
index 0000000000000000000000000000000000000000..f897924fbdb9d62e7113e3c496ba12ff85401425
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_minimum.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input1, input2):
+        # torch.minimum(input, other, *, out=None)
+        # Computes the element-wise minimum of input and other.
+        # This operation is commonly used in neural networks for:
+        # - Clamping values to a maximum threshold
+        # - Implementing certain activation functions
+        # - Combining feature maps with element-wise minimum
+        return torch.minimum(input1, input2)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input1_1 = torch.randn(256, 512, dtype=torch.float32)
+    input2_1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input1_2 = torch.randn(1024, 4096, dtype=torch.float32)
+    input2_2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input1_3 = torch.randn(2048, 4096, dtype=torch.float32)
+    input2_3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input1_4 = torch.randn(768, 2688, dtype=torch.float32)
+    input2_4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input1_1, input2_1],
+        [input1_2, input2_2],
+        [input1_3, input2_3],
+        [input1_4, input2_4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for minimum
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_neg.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_neg.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcf339c75a6eac9d1d3ad2575a8f7c82a6cf537e
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_neg.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.neg(input, *, out=None)
+        # Returns a new tensor with the negative of the elements of input.
+        # This operation is commonly used in neural networks for:
+        # - Implementing mathematical transformations
+        # - Computing differences or residuals
+        # - Implementing certain activation functions
+        return torch.neg(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for neg
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_pow_001.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_pow_001.py
new file mode 100644
index 0000000000000000000000000000000000000000..eee6030ba24368c06a00c88485ebb8449ed8d99a
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_pow_001.py
@@ -0,0 +1,47 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor, exponent):
+        # torch.pow(input, exponent, *, out=None)
+        # Takes the power of each element in input with exponent and returns a tensor with the result.
+        # This is a power operation with exponent=2.0 (squaring).
+        # Power operations are commonly used in neural networks for:
+        # - Implementing polynomial activation functions
+        # - Computing distance metrics
+        # - Mathematical transformations in specialized layers
+        return torch.pow(input_tensor, exponent)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+    exponent1 = torch.full((256, 512), 2.0, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+    exponent2 = torch.full((1024, 4096), 2.0, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+    exponent3 = torch.full((2048, 4096), 2.0, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+    exponent4 = torch.full((768, 2688), 2.0, dtype=torch.float32)
+
+    return [
+        [input_tensor1, exponent1],
+        [input_tensor2, exponent2],
+        [input_tensor3, exponent3],
+        [input_tensor4, exponent4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for pow
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_rsqrt.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_rsqrt.py
new file mode 100644
index 0000000000000000000000000000000000000000..7caa23c89b61cb30b57b42fdb96748b0fbba725e
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_rsqrt.py
@@ -0,0 +1,43 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.rsqrt(input, *, out=None)
+        # Returns a new tensor with the reciprocal square root of the elements of input.
+        # rsqrt(input) = 1 / sqrt(input)
+        # This operation is commonly used in neural networks for:
+        # - Normalization operations (e.g., RMS normalization, layer normalization)
+        # - Mathematical transformations in specialized layers
+        # - Implementing certain activation functions
+        return torch.rsqrt(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for rsqrt
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_silu_001.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_silu_001.py
new file mode 100644
index 0000000000000000000000000000000000000000..ceb03e0f9fc9cef29141eced8af36f8bd67f75c7
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_silu_001.py
@@ -0,0 +1,45 @@
+import torch
+import torch.nn as nn
+
+class Model(nn.Module):
+    """
+    SiLU (Sigmoid Linear Unit) activation function operation, also known as Swish.
+    This operation is commonly used in neural networks for:
+    - Activation function in EfficientNet and other modern architectures
+    - Used in various transformer models
+    - Provides smooth, non-monotonic activation
+    
+    Formula: output = input * sigmoid(input)
+    """
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # SiLU (Swish) activation function applied to input_tensor
+        result = torch.nn.functional.silu(input_tensor)
+        return result
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=16, seq=64, hidden=512)
+    input_tensor1 = torch.randn(16, 64, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=32, seq=512, hidden=1024)
+    input_tensor2 = torch.randn(32, 512, 1024, dtype=torch.float32)
+
+    # Case 3: Large (batch=64, seq=2048, hidden=4096)
+    input_tensor3 = torch.randn(64, 2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=48, seq=256, hidden=2688)
+    input_tensor4 = torch.randn(48, 256, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+def get_init_inputs():
+    # No parameters for SiLU activation operation
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_sin.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_sin.py
new file mode 100644
index 0000000000000000000000000000000000000000..d09a035de30a3a82b928eaeb0669b3a4f8df6861
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_sin.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.sin(input, *, out=None)
+        # Computes the element-wise sine of the input tensor.
+        # This operation is commonly used in neural networks for:
+        # - Implementing periodic activation functions
+        # - Positional encoding in transformers
+        # - Signal processing operations
+        return torch.sin(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for sin
+    return []
\ No newline at end of file
diff --git a/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_tan.py b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_tan.py
new file mode 100644
index 0000000000000000000000000000000000000000..476b6c4d21e6b24c616cffb3c8078c5673687d0c
--- /dev/null
+++ b/aikg/benchmark/aikgbench/dynamic_shape/elemwise/elemwise_tan.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    def forward(self, input_tensor):
+        # torch.tan(input, *, out=None)
+        # Returns a new tensor with the tangent of the elements of input.
+        # This operation is commonly used in neural networks for:
+        # - Implementing certain activation functions
+        # - Mathematical transformations in specialized layers
+        # - Periodic function approximations
+        return torch.tan(input_tensor)
+
+
+def get_inputs_dyn_list():
+    # Case 1: Small (batch=256, hidden=512)
+    input_tensor1 = torch.randn(256, 512, dtype=torch.float32)
+
+    # Case 2: Middle (batch=1024, hidden=4096)
+    input_tensor2 = torch.randn(1024, 4096, dtype=torch.float32)
+
+    # Case 3: Large (batch=2048, hidden=4096)
+    input_tensor3 = torch.randn(2048, 4096, dtype=torch.float32)
+
+    # Case 4: Non-aligned (batch=768, hidden=2688)
+    input_tensor4 = torch.randn(768, 2688, dtype=torch.float32)
+
+    return [
+        [input_tensor1],
+        [input_tensor2],
+        [input_tensor3],
+        [input_tensor4]
+    ]
+
+
+def get_init_inputs():
+    # No parameters needed for tan
+    return []
\ No newline at end of file
diff --git a/aikg/python/ai_kernel_generator/resources/prompts/coder/codegen.j2 b/aikg/python/ai_kernel_generator/resources/prompts/coder/codegen.j2
index 208ab6e36056b158b6f1ffa0a4bfddac07683f46..3aea6e9cf13def8bdd6c802217c93be41044b7b2 100644
--- a/aikg/python/ai_kernel_generator/resources/prompts/coder/codegen.j2
+++ b/aikg/python/ai_kernel_generator/resources/prompts/coder/codegen.j2
@@ -105,6 +105,22 @@
 inputs = get_inputs()
 {% if "triton" in dsl %}
 # 运行Triton实现
+## 卷积类算子生成
+请注意！！如果检测到给出的任务示例是卷积类的算子任务，为了保证Triton的卷积核权重与当前任务代码的卷积核权重共享，在Triton的host测生成对应的weight之前，例如：
+```
+    import torch
+    import torch.nn as nn
+    import triton
+    import triton.language as tl
+    # Triton内核
+    @triton.jit
+    def triton_kernel():
+        pass
+    def triton_host():
+        args = ...
+        weight = nn.ConvXXX(**args).weight.to(device)
+```
+我们会在调用Triton代码之前固定随机种子以保证Triton的卷积核权重与任务代码的卷积核权重一致，请务必保证nn中调用的module要与任务torch代码中调用的module要一致，使用的参数要一致，device设置请参考不同的硬件后端（"cuda", "npu"）。
 output = {{ func_name }}(*inputs)
 {% elif dsl == "swft" %}
 # 运行SWFT实现
diff --git a/aikg/python/ai_kernel_generator/resources/templates/kernel_verify_template.j2 b/aikg/python/ai_kernel_generator/resources/templates/kernel_verify_template.j2
index 758ee709e7294839dac4edb55da9bb72cb51f109..49ce369e317d424f586f00ac235e5af9eba97578 100644
--- a/aikg/python/ai_kernel_generator/resources/templates/kernel_verify_template.j2
+++ b/aikg/python/ai_kernel_generator/resources/templates/kernel_verify_template.j2
@@ -288,7 +288,7 @@ def verify_implementations():
     {% elif framework == "mindspore" %}
     inputs = get_inputs()
     {% endif %}
-    
+    torch.manual_seed(0)
     # 运行框架实现
     framework_output = framework_model(*inputs)