From 76989aea8b816c82268c22e99dd53be40ab68f49 Mon Sep 17 00:00:00 2001 From: jiangli Date: Tue, 4 Jun 2024 16:50:18 +0800 Subject: [PATCH 01/11] add tf op --- attention_fusion/build_ops.sh | 23 +++++++ attention_fusion/test/test.py | 93 ++++++++++++++++++++++++++ attention_fusion/tf_ops/CMakeLists.txt | 34 ++++++++++ attention_fusion/tf_ops/atten_ops.cpp | 81 ++++++++++++++++++++++ attention_fusion/tf_ops/atten_ops.h | 19 ++++++ 5 files changed, 250 insertions(+) create mode 100644 attention_fusion/build_ops.sh create mode 100644 attention_fusion/test/test.py create mode 100644 attention_fusion/tf_ops/CMakeLists.txt create mode 100644 attention_fusion/tf_ops/atten_ops.cpp create mode 100644 attention_fusion/tf_ops/atten_ops.h diff --git a/attention_fusion/build_ops.sh b/attention_fusion/build_ops.sh new file mode 100644 index 0000000..d175969 --- /dev/null +++ b/attention_fusion/build_ops.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +mkdir build +cd build +cmake ../tf_ops +make -j4 + +cd test +python3 test.py diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py new file mode 100644 index 0000000..e24e49f --- /dev/null +++ b/attention_fusion/test/test.py @@ -0,0 +1,93 @@ +from mpi4py import MPI +import os +import numpy as np +from math import sqrt + +import tensorflow as tf +tf.compat.v1.disable_eager_execution() +tfOpLib = tf.load_op_library("../build/libattention_ops.so") + +import npu_device +from npu_device.compat.v1.npu_init import * + +loss = 1e-3 +minimum = 10e-10 + +def softmax(src): + #基于last轴进行rowmax(按行取最大值)处理 + max = np.max(src, axis=-1, keepdims=True) + sub = src - max + exp = np.exp(sub) + #基于last轴进行rowsum(按行求和)处理 + sum = np.sum(exp, axis=-1, keepdims=True) + dst = exp / sum + return dst + +def gloden_atten_fusion(query, key, value, atten_mask): + qk = np.matmul(query, key.transpose(0, 2, 1)) + + # @jiangli + print("attn_dim: ", query.shape[2]) + attnDimSqrt = 1 / sqrt(query.shape[2]) + attnWeight = np.multiply(qk, attnDimSqrt) + addMask = np.add(attnWeight, atten_mask) + qk_div = softmax(addMask) + + out = np.matmul(qk_div, value) + return out, qk_div + +def verify_result(real_result, golden): + print(real_result[:32]) + print(golden[:32]) + result = np.abs(real_result - golden) + deno = np.maximum(np.abs(real_result), np.abs(golden)) + result_atol = np.less_equal(result, loss) + result_rtol = np.less_equal(result / np.add(deno, minimum), loss) + if not result_rtol.all() and not result_atol.all(): + if np.sum(result_rtol == False) > real_result.size * loss and np.sum(result_atol == False) > real_result.size * loss: + print("[ERROR] result error") + return False + print("test pass") + return True + +npu_device.compat.enable_v1() +npu_init = npu_ops.initialize_system() +npu_shutdown = npu_ops.shutdown_system() +config = tf.compat.v1.ConfigProto() +custom_op = config.graph_options.rewrite_options.custom_optmizers.add() +custom_op.name = "NpuOptimizer" +config.graph_options.rewrite_options.remapping = RewriterConfig.OFF +config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF + +#测试用例 +dim0 = 1024 +dim1 = 1000 +dim2 = 80 +dim3 = 50 + +query = tf.random_uniform([dim0, dim1, dim2], maxval = 10, dtype=tf.float32) +key = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) +value = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) +mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) + +golden_atten_score, gold_softmax_out = gloden_atten_fusion(query, key, value, mask) + +ret = tfOpLib.attention_fusion(query=query, key=key, + value=value, attnMask=mask) +init = tf.compat.v1.global_varibles_initializer() + +with tf.compat.v1.Session(config=config) as sess: + sess.run(init) + attenScore, softmaxOut = sess.run(ret) + print("============ attention fusion =============") + print("attenScore: ") + verify_result(attenScore, golden_atten_score) + print("softmaxOut: ") + verify_result(softmaxOut, gold_softmax_out) + + + + + + + diff --git a/attention_fusion/tf_ops/CMakeLists.txt b/attention_fusion/tf_ops/CMakeLists.txt new file mode 100644 index 0000000..291b68a --- /dev/null +++ b/attention_fusion/tf_ops/CMakeLists.txt @@ -0,0 +1,34 @@ +# Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +cmake_minimum_required(VERSION 3.20) +set(CMAKE_CXX_STANDARD 14) + +include_directories(/usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/inlcude) +link_directories(/usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/) + +file(GLOB_RECURSE TF_OPS ./*.cpp) +add_library(attention_ops SHARED ${TF_OPS}) + +message("TF VERSION" ${TF_VERSION}) +if(${TF_VERSION} EQUAL 1) + target_link_libraries(attention_ops /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/libtensorflow_framework.so.1) + target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/inlcude) +else() + target_link_libraries(attention_ops /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/libtensorflow_framework.so.2) + target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/inlcude) +endif() + +install(TARGET attention_ops LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}) \ No newline at end of file diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp new file mode 100644 index 0000000..99a9513 --- /dev/null +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -0,0 +1,81 @@ +/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and + limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" + +using namespace tensorflow; +using shape_inference::inferenceContext; +using shape_inference::ShapeHandle; + +using namespace std; +using namespace chrono; + +using OpKernelConstructionPtr = OpKernelConstructionPtr*; +using OpKernelContextPtr = OpKernelContextPtr*; +using inferenceContextPtr = ::tensorflow::shape_inference::inferenceContext*; + +namespace { + class CustOps : public OpKernel { + public: + explicit CustOps(OpKernelConstructionPtr context) : OpKernel(context) + { + } + + void Compute(OpKernelContextPtr context) override + { + std::cout << "Cust Ops not installed!!" << std::endl; + } + + ~CustOps() override = default; + } +} + +namespace tensorflow { + REGISTER_OP("attention_fusion") + .Input("query: float") + .Input("key: float") + .Input("value: float") + .Input("attnMask: float") + .Output("attenScore: float") + .Output("softmaxOut: float") + .SetIsStateful() + .SetShapeFn([](::tensorflow::shape_inference::inferenceContext *c)) { + ShapeHandle query_shape; + ShapeHandle key_shape; + ShapeHandle value_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &query_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &key_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &value_shape)); + + tensorflow::shape_inference::DimensionHandle queryDim0 = c->Dim(query_shape, 0); + tensorflow::shape_inference::DimensionHandle queryDim1 = c->Dim(query_shape, 1); + tensorflow::shape_inference::DimensionHandle keyDim1 = c->Dim(key_shape, 1); + tensorflow::shape_inference::DimensionHandle valueDim2 = c->Dim(value_shape, 2); + int64_t shape0 = c->Value(queryDim0); + int64_t shape1 = c->Value(queryDim1); + int64_t shape2 = c->Value(keyDim1); + int64_t shape3 = c->Value(valueDim2); + + c->set_output(0, c->MakeShape({shape0, shape1, shape3})); + c->set_output(1, c->MakeShape({shape0, shape1, shape2})); + return Status::OK(); + } + REGISTER_KERNEL_BUILDER(Name("FusedAttention").Device(Device_CPU), CustOps) +} \ No newline at end of file diff --git a/attention_fusion/tf_ops/atten_ops.h b/attention_fusion/tf_ops/atten_ops.h new file mode 100644 index 0000000..6d022f6 --- /dev/null +++ b/attention_fusion/tf_ops/atten_ops.h @@ -0,0 +1,19 @@ +/* Copyright 2024. Huawei Technologies Co.,Ltd. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and + limitations under the License. +==============================================================================*/ + +#ifndef ATTEN_OPS_H +#define ATTEN_OPS_H + +#endif // ATTEN_OPS_H \ No newline at end of file -- Gitee From 3c62ba869ecdef81a1404894101eb036d99d7245 Mon Sep 17 00:00:00 2001 From: jiangli Date: Wed, 5 Jun 2024 18:05:12 +0800 Subject: [PATCH 02/11] update --- attention_fusion/CMakeLists.txt | 2 ++ attention_fusion/build_ops.sh | 8 ++++++-- attention_fusion/test/test.py | 19 +++++++++++-------- attention_fusion/tf_ops/CMakeLists.txt | 20 +++++++++++--------- attention_fusion/tf_ops/atten_ops.cpp | 26 +++++++++++++------------- 5 files changed, 43 insertions(+), 32 deletions(-) create mode 100644 attention_fusion/CMakeLists.txt diff --git a/attention_fusion/CMakeLists.txt b/attention_fusion/CMakeLists.txt new file mode 100644 index 0000000..f9184b8 --- /dev/null +++ b/attention_fusion/CMakeLists.txt @@ -0,0 +1,2 @@ +cmake_minimum_required(VERSION 3.20) +add_subdirectory(tf_ops) \ No newline at end of file diff --git a/attention_fusion/build_ops.sh b/attention_fusion/build_ops.sh index d175969..f658ea3 100644 --- a/attention_fusion/build_ops.sh +++ b/attention_fusion/build_ops.sh @@ -14,10 +14,14 @@ # limitations under the License. # ============================================================================== +if [ -d build ]; then + rm -rf build +fi + mkdir build cd build -cmake ../tf_ops +cmake .. make -j4 -cd test +cd ../test python3 test.py diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index e24e49f..0d4ef7e 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -3,10 +3,14 @@ import os import numpy as np from math import sqrt +os.environ["DEVICE_ID"] = str(0) +os.environ["ASCEND_DEVICE_ID"] = str(0) +os.environ["JOB_ID"] = "10086" + import tensorflow as tf tf.compat.v1.disable_eager_execution() -tfOpLib = tf.load_op_library("../build/libattention_ops.so") - +tfOpLib = tf.load_op_library("../build/tf_ops/libattention_ops.so") +import sys import npu_device from npu_device.compat.v1.npu_init import * @@ -54,7 +58,7 @@ npu_device.compat.enable_v1() npu_init = npu_ops.initialize_system() npu_shutdown = npu_ops.shutdown_system() config = tf.compat.v1.ConfigProto() -custom_op = config.graph_options.rewrite_options.custom_optmizers.add() +custom_op = config.graph_options.rewrite_options.custom_optimizers.add() custom_op.name = "NpuOptimizer" config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF @@ -72,18 +76,17 @@ mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) golden_atten_score, gold_softmax_out = gloden_atten_fusion(query, key, value, mask) -ret = tfOpLib.attention_fusion(query=query, key=key, - value=value, attnMask=mask) -init = tf.compat.v1.global_varibles_initializer() +ret = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=mask) +init = tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session(config=config) as sess: sess.run(init) attenScore, softmaxOut = sess.run(ret) print("============ attention fusion =============") print("attenScore: ") - verify_result(attenScore, golden_atten_score) + # verify_result(attenScore, golden_atten_score) print("softmaxOut: ") - verify_result(softmaxOut, gold_softmax_out) + # verify_result(softmaxOut, gold_softmax_out) diff --git a/attention_fusion/tf_ops/CMakeLists.txt b/attention_fusion/tf_ops/CMakeLists.txt index 291b68a..0d723f9 100644 --- a/attention_fusion/tf_ops/CMakeLists.txt +++ b/attention_fusion/tf_ops/CMakeLists.txt @@ -15,20 +15,22 @@ cmake_minimum_required(VERSION 3.20) set(CMAKE_CXX_STANDARD 14) +project(attention) -include_directories(/usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/inlcude) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +include_directories(/usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/include) link_directories(/usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/) file(GLOB_RECURSE TF_OPS ./*.cpp) add_library(attention_ops SHARED ${TF_OPS}) -message("TF VERSION" ${TF_VERSION}) -if(${TF_VERSION} EQUAL 1) - target_link_libraries(attention_ops /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/libtensorflow_framework.so.1) - target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/inlcude) -else() +# message("TF VERSION" ${TF_VERSION}) +# if(${TF_VERSION} EQUAL 1) +# target_link_libraries(attention_ops /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/libtensorflow_framework.so.1) +# target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow_core/inlcude) +# else() target_link_libraries(attention_ops /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/libtensorflow_framework.so.2) - target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/inlcude) -endif() + target_include_directories(attention_ops PUBLIC /usr/local/python3.7.5/lib/python3.7/site-packages/tensorflow/include) +# endif() -install(TARGET attention_ops LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}) \ No newline at end of file +install(TARGETS attention_ops LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}) \ No newline at end of file diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp index 99a9513..eeabcb7 100644 --- a/attention_fusion/tf_ops/atten_ops.cpp +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -21,15 +21,15 @@ See the License for the specific language governing permissions and #include "tensorflow/core/framework/op_kernel.h" using namespace tensorflow; -using shape_inference::inferenceContext; +using shape_inference::InferenceContext; using shape_inference::ShapeHandle; using namespace std; using namespace chrono; -using OpKernelConstructionPtr = OpKernelConstructionPtr*; -using OpKernelContextPtr = OpKernelContextPtr*; -using inferenceContextPtr = ::tensorflow::shape_inference::inferenceContext*; +using OpKernelConstructionPtr = OpKernelConstruction*; +using OpKernelContextPtr = OpKernelContext*; +using InferenceContextPtr = ::tensorflow::shape_inference::InferenceContext*; namespace { class CustOps : public OpKernel { @@ -44,7 +44,7 @@ namespace { } ~CustOps() override = default; - } + }; } namespace tensorflow { @@ -52,17 +52,17 @@ namespace tensorflow { .Input("query: float") .Input("key: float") .Input("value: float") - .Input("attnMask: float") - .Output("attenScore: float") - .Output("softmaxOut: float") + .Input("atten_mask: float") + .Output("atten_score: float") + .Output("softmax_out: float") .SetIsStateful() - .SetShapeFn([](::tensorflow::shape_inference::inferenceContext *c)) { + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext *c) { ShapeHandle query_shape; ShapeHandle key_shape; ShapeHandle value_shape; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &query_shape)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &key_shape)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &value_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &key_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 3, &value_shape)); tensorflow::shape_inference::DimensionHandle queryDim0 = c->Dim(query_shape, 0); tensorflow::shape_inference::DimensionHandle queryDim1 = c->Dim(query_shape, 1); @@ -76,6 +76,6 @@ namespace tensorflow { c->set_output(0, c->MakeShape({shape0, shape1, shape3})); c->set_output(1, c->MakeShape({shape0, shape1, shape2})); return Status::OK(); - } - REGISTER_KERNEL_BUILDER(Name("FusedAttention").Device(Device_CPU), CustOps) + }); + REGISTER_KERNEL_BUILDER(Name("FusedAttention").Device(DEVICE_CPU), CustOps) } \ No newline at end of file -- Gitee From 56ffc7989e0fb57b68727acfacd3a2ef871d9b14 Mon Sep 17 00:00:00 2001 From: jiangli Date: Thu, 6 Jun 2024 15:03:09 +0800 Subject: [PATCH 03/11] update --- .../op_host/attention_fusion_tiling.h | 20 +++++++++---------- .../op_kernel/attention_fusion.cpp | 8 ++++++-- .../op_kernel/attention_fusion_kernel.h | 6 +++--- .../op_kernel/normalize_compute.h | 2 +- attention_fusion/op_kernel/q_k_bmm_compute.h | 2 +- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/attention_fusion/op_host/attention_fusion_tiling.h b/attention_fusion/op_host/attention_fusion_tiling.h index 5125d89..6628420 100644 --- a/attention_fusion/op_host/attention_fusion_tiling.h +++ b/attention_fusion/op_host/attention_fusion_tiling.h @@ -4,16 +4,16 @@ namespace optiling { BEGIN_TILING_DATA_DEF(AttentionFusionTilingData) TILING_DATA_FIELD_DEF(uint8_t, normalizeAttr); TILING_DATA_FIELD_DEF(float, attnDim); - TILING_DATA_FIELD_DEF(int, queryDim1); - TILING_DATA_FIELD_DEF(int, queryDim2); - TILING_DATA_FIELD_DEF(int, keyDim1); - TILING_DATA_FIELD_DEF(int, keyDim2); - TILING_DATA_FIELD_DEF(int, valueDim1); - TILING_DATA_FIELD_DEF(int, valueDim2); - TILING_DATA_FIELD_DEF(int, batchNum); - TILING_DATA_FIELD_DEF(int, normalizeLoop); - TILING_DATA_FIELD_DEF(int, normalizeRow); - TILING_DATA_FIELD_DEF(int, normalizeColumn); + TILING_DATA_FIELD_DEF(int32_t, queryDim1); + TILING_DATA_FIELD_DEF(int32_t, queryDim2); + TILING_DATA_FIELD_DEF(int32_t, keyDim1); + TILING_DATA_FIELD_DEF(int32_t, keyDim2); + TILING_DATA_FIELD_DEF(int32_t, valueDim1); + TILING_DATA_FIELD_DEF(int32_t, valueDim2); + TILING_DATA_FIELD_DEF(int32_t, batchNum); + TILING_DATA_FIELD_DEF(int32_t, normalizeLoop); + TILING_DATA_FIELD_DEF(int32_t, normalizeRow); + TILING_DATA_FIELD_DEF(int32_t, normalizeColumn); TILING_DATA_FIELD_DEF(float, normalizeSqrt); TILING_DATA_FIELD_DEF(uint64_t, maxSharedTmpBuf); TILING_DATA_FIELD_DEF_STRUCT(TCubeTiling, qkMatmulTiling); diff --git a/attention_fusion/op_kernel/attention_fusion.cpp b/attention_fusion/op_kernel/attention_fusion.cpp index 32267a9..efdbc78 100644 --- a/attention_fusion/op_kernel/attention_fusion.cpp +++ b/attention_fusion/op_kernel/attention_fusion.cpp @@ -6,13 +6,17 @@ using namespace AscendC; // call of kernel function extern "C" __global__ __aicore__ void attention_fusion(GM_ADDR query, GM_ADDR key, GM_ADDR value, GM_ADDR attnMask, GM_ADDR attenScore, GM_ADDR softmaxOut, GM_ADDR workspace, GM_ADDR tiling) { GET_TILING_DATA(tiling_data, tiling); + + const TCubeTiling *qkMatmulTiling = &tiling_data.qkMatmulTiling; + const TCubeTiling *kvMatmulTiling = &tiling_data.kvMatmulTiling; + const SoftMaxTiling *softMaxTilingData = &tiling_data.softMaxTilingData; // TODO: user kernel impl AttentionFusionArgs args { query, key, value, attnMask, attenScore, softmaxOut, tiling_data.normalizeAttr, tiling_data.queryDim1, tiling_data.queryDim2, tiling_data.keyDim1, tiling_data.keyDim2, tiling_data.valueDim1, tiling_data.valueDim2, tiling_data.batchNum, tiling_data.normalizeLoop, tiling_data.normalizeRow, tiling_data.normalizeColumn, - tiling_data.normalizeSqrt, tiling_data.maxSharedTmpBuf, &tiling_data.qkMatmulTiling, - &tiling_data.kvMatmulTiling, &tiling_data.softMaxTilingData + tiling_data.normalizeSqrt, tiling_data.maxSharedTmpBuf, qkMatmulTiling, + kvMatmulTiling, softMaxTilingData }; AttentionFusionKernel kernel; diff --git a/attention_fusion/op_kernel/attention_fusion_kernel.h b/attention_fusion/op_kernel/attention_fusion_kernel.h index 27a3dc4..298cb9a 100644 --- a/attention_fusion/op_kernel/attention_fusion_kernel.h +++ b/attention_fusion/op_kernel/attention_fusion_kernel.h @@ -32,9 +32,9 @@ struct AttentionFusionArgs { float normalizeSqrt; uint64_t maxSharedTmpBuf; - TCubeTiling* qkMatmulTiling; - TCubeTiling* kvMatmulTiling; - SoftMaxTiling* softMaxTilingData; + const TCubeTiling* qkMatmulTiling; + const TCubeTiling* kvMatmulTiling; + const SoftMaxTiling* softMaxTilingData; }; struct AttentionFusionPipe { diff --git a/attention_fusion/op_kernel/normalize_compute.h b/attention_fusion/op_kernel/normalize_compute.h index e79f19e..122891e 100644 --- a/attention_fusion/op_kernel/normalize_compute.h +++ b/attention_fusion/op_kernel/normalize_compute.h @@ -18,7 +18,7 @@ struct NormalizeArgs { float normalizeSqrt; uint64_t maxSharedTmpBuf; - SoftMaxTiling* tiling; + const SoftMaxTiling* tiling; }; template diff --git a/attention_fusion/op_kernel/q_k_bmm_compute.h b/attention_fusion/op_kernel/q_k_bmm_compute.h index ee261c3..1a714d8 100644 --- a/attention_fusion/op_kernel/q_k_bmm_compute.h +++ b/attention_fusion/op_kernel/q_k_bmm_compute.h @@ -19,7 +19,7 @@ struct QKBmmArgs { int batchOffset; int batchLen; - TCubeTiling* qkMatmulTiling; + const TCubeTiling* qkMatmulTiling; }; struct QKBmmPipeArgs { -- Gitee From 0df5702a10fc35f73a7eb85d46ccd9b4f65a31c9 Mon Sep 17 00:00:00 2001 From: jiangli Date: Thu, 6 Jun 2024 15:11:43 +0800 Subject: [PATCH 04/11] fix a typo --- attention_fusion/tf_ops/atten_ops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp index eeabcb7..36d8b0d 100644 --- a/attention_fusion/tf_ops/atten_ops.cpp +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -48,7 +48,7 @@ namespace { } namespace tensorflow { - REGISTER_OP("attention_fusion") + REGISTER_OP("AttentionFusion") .Input("query: float") .Input("key: float") .Input("value: float") -- Gitee From 2233292dbaeaf8a0c0e432ff54c636919f006c30 Mon Sep 17 00:00:00 2001 From: jiangli Date: Thu, 6 Jun 2024 15:34:42 +0800 Subject: [PATCH 05/11] update --- .../aclnn_attention_fusion/inc/operator_desc.h | 1 + attention_fusion/aclnn_attention_fusion/src/main.cpp | 1 + .../aclnn_attention_fusion/src/op_runner.cpp | 7 ++++--- attention_fusion/attention_fusion.json | 10 +++++++++- attention_fusion/op_host/attention_fusion.cpp | 12 +++++++++++- attention_fusion/test/test.py | 2 +- attention_fusion/tf_ops/atten_ops.cpp | 1 + 7 files changed, 28 insertions(+), 6 deletions(-) diff --git a/attention_fusion/aclnn_attention_fusion/inc/operator_desc.h b/attention_fusion/aclnn_attention_fusion/inc/operator_desc.h index 4cbdf07..225e848 100644 --- a/attention_fusion/aclnn_attention_fusion/inc/operator_desc.h +++ b/attention_fusion/aclnn_attention_fusion/inc/operator_desc.h @@ -52,6 +52,7 @@ struct OperatorDesc { std::string opType; std::vector inputDesc; std::vector outputDesc; + int32_t maskOnOptional; }; #endif // OPERATOR_DESC_H diff --git a/attention_fusion/aclnn_attention_fusion/src/main.cpp b/attention_fusion/aclnn_attention_fusion/src/main.cpp index 661aade..76a8f61 100644 --- a/attention_fusion/aclnn_attention_fusion/src/main.cpp +++ b/attention_fusion/aclnn_attention_fusion/src/main.cpp @@ -38,6 +38,7 @@ OperatorDesc CreateOpDesc() aclDataType dataTypeSoftmaxOut = ACL_FLOAT; aclFormat format = ACL_FORMAT_ND; OperatorDesc opDesc; + opDesc.maskOnOptional = 1; opDesc.AddInputTensorDesc(dataTypeQuery, shapeQuery.size(), shapeQuery.data(), format); opDesc.AddInputTensorDesc(dataTypeKey, shapeKey.size(), shapeKey.data(), format); opDesc.AddInputTensorDesc(dataTypeValue, shapeValue.size(), shapeValue.data(), format); diff --git a/attention_fusion/aclnn_attention_fusion/src/op_runner.cpp b/attention_fusion/aclnn_attention_fusion/src/op_runner.cpp index 9806562..a47a5ca 100644 --- a/attention_fusion/aclnn_attention_fusion/src/op_runner.cpp +++ b/attention_fusion/aclnn_attention_fusion/src/op_runner.cpp @@ -307,8 +307,9 @@ bool OpRunner::RunOp() size_t workspaceSize = 0; aclOpExecutor *handle = nullptr; - auto ret = aclnnAttentionFusionGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], outputTensor_[0], outputTensor_[1], - &workspaceSize, &handle); + auto ret = aclnnAttentionFusionGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], + opDesc_->maskOnOptional, outputTensor_[0], outputTensor_[1], + &workspaceSize, &handle); if (ret != ACL_SUCCESS) { (void)aclrtDestroyStream(stream); ERROR_LOG("Get Operator Workspace failed. error code is %d", static_cast(ret)); @@ -341,7 +342,7 @@ bool OpRunner::RunOp() auto beforeTime = std::chrono::steady_clock::now(); for (int i = 0; i<100; i++) { - ret = aclnnAttentionFusionGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], outputTensor_[0], outputTensor_[1], + ret = aclnnAttentionFusionGetWorkspaceSize(inputTensor_[0], inputTensor_[1], inputTensor_[2], inputTensor_[3], int(1), outputTensor_[0], outputTensor_[1], &workspaceSize, &handle); ret = aclnnAttentionFusion(workspace, workspaceSize, handle, stream); } diff --git a/attention_fusion/attention_fusion.json b/attention_fusion/attention_fusion.json index 7a32ab4..4a06e1b 100644 --- a/attention_fusion/attention_fusion.json +++ b/attention_fusion/attention_fusion.json @@ -65,6 +65,14 @@ "float" ] } - ] + ], + "attr": [ + { + "name": "mask_on", + "param_type": "optional", + "type": "int", + "default_value": 0 + } + ] } ] \ No newline at end of file diff --git a/attention_fusion/op_host/attention_fusion.cpp b/attention_fusion/op_host/attention_fusion.cpp index e138f2c..71bd8e9 100644 --- a/attention_fusion/op_host/attention_fusion.cpp +++ b/attention_fusion/op_host/attention_fusion.cpp @@ -140,16 +140,25 @@ static ge::graphStatus InferShape(gert::InferShapeContext* context) gert::Shape* attnScoreShape = context->GetOutputShape(0); gert::Shape* softmaxOutShape = context->GetOutputShape(1); + attnScoreShape->SetDimNum(3); attnScoreShape->SetDim(0, qShape->GetDim(0)); attnScoreShape->SetDim(1, qShape->GetDim(1)); attnScoreShape->SetDim(2, vShape->GetDim(2)); + softmaxOutShape->SetDimNum(3); softmaxOutShape->SetDim(0, qShape->GetDim(0)); softmaxOutShape->SetDim(1, qShape->GetDim(1)); softmaxOutShape->SetDim(2, kShape->GetDim(1)); return GRAPH_SUCCESS; } + +static ge::graphStatus InferDtype(gert::InferShapeContext* context) +{ + context->SetOutputDataType(0, context->GetInputDataType(0)); + context->SetOutputDataType(1, context->GetInputDataType(1)); + return GRAPH_SUCCESS; +} } @@ -188,8 +197,9 @@ public: .DataType({ge::DT_FLOAT}) .Format({ge::FORMAT_ND}) .UnknownShapeFormat({ge::FORMAT_ND}); + this->Attr("mask_on").Int(); - this->SetInferShape(ge::InferShape); + this->SetInferShape(ge::InferShape).SetInferDataType(ge::InferDtype); this->AICore() .SetTiling(optiling::TilingFunc); diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index 0d4ef7e..ab0137a 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -74,7 +74,7 @@ key = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) value = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) -golden_atten_score, gold_softmax_out = gloden_atten_fusion(query, key, value, mask) +# golden_atten_score, gold_softmax_out = gloden_atten_fusion(query, key, value, mask) ret = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=mask) init = tf.compat.v1.global_variables_initializer() diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp index 36d8b0d..c157c12 100644 --- a/attention_fusion/tf_ops/atten_ops.cpp +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -55,6 +55,7 @@ namespace tensorflow { .Input("atten_mask: float") .Output("atten_score: float") .Output("softmax_out: float") + .Attr("mask_on: int") .SetIsStateful() .SetShapeFn([](::tensorflow::shape_inference::InferenceContext *c) { ShapeHandle query_shape; -- Gitee From 252451b41169788a701df070dcc3a4a57f98a4d2 Mon Sep 17 00:00:00 2001 From: jiangli Date: Thu, 6 Jun 2024 16:17:50 +0800 Subject: [PATCH 06/11] update --- attention_fusion/attention_fusion.json | 2 +- attention_fusion/op_host/attention_fusion.cpp | 1 - .../op_kernel/attention_fusion_kernel.h | 20 ++++---- attention_fusion/test/test.py | 50 ++----------------- attention_fusion/tf_ops/atten_ops.cpp | 2 +- 5 files changed, 15 insertions(+), 60 deletions(-) diff --git a/attention_fusion/attention_fusion.json b/attention_fusion/attention_fusion.json index 4a06e1b..130f1c6 100644 --- a/attention_fusion/attention_fusion.json +++ b/attention_fusion/attention_fusion.json @@ -73,6 +73,6 @@ "type": "int", "default_value": 0 } - ] + ] } ] \ No newline at end of file diff --git a/attention_fusion/op_host/attention_fusion.cpp b/attention_fusion/op_host/attention_fusion.cpp index 71bd8e9..ec1dc7a 100644 --- a/attention_fusion/op_host/attention_fusion.cpp +++ b/attention_fusion/op_host/attention_fusion.cpp @@ -152,7 +152,6 @@ static ge::graphStatus InferShape(gert::InferShapeContext* context) return GRAPH_SUCCESS; } - static ge::graphStatus InferDtype(gert::InferShapeContext* context) { context->SetOutputDataType(0, context->GetInputDataType(0)); diff --git a/attention_fusion/op_kernel/attention_fusion_kernel.h b/attention_fusion/op_kernel/attention_fusion_kernel.h index 298cb9a..6c45362 100644 --- a/attention_fusion/op_kernel/attention_fusion_kernel.h +++ b/attention_fusion/op_kernel/attention_fusion_kernel.h @@ -19,16 +19,16 @@ struct AttentionFusionArgs { GM_ADDR softmaxOut; uint8_t normalizeAttr; - int queryDim1; - int queryDim2; - int keyDim1; - int keyDim2; - int valueDim1; - int valueDim2; - int batchNum; - int normalizeLoop; - int normalizeRow; - int normalizeColumn; + int32_t queryDim1; + int32_t queryDim2; + int32_t keyDim1; + int32_t keyDim2; + int32_t valueDim1; + int32_t valueDim2; + int32_t batchNum; + int32_t normalizeLoop; + int32_t normalizeRow; + int32_t normalizeColumn; float normalizeSqrt; uint64_t maxSharedTmpBuf; diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index ab0137a..573a305 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -14,46 +14,6 @@ import sys import npu_device from npu_device.compat.v1.npu_init import * -loss = 1e-3 -minimum = 10e-10 - -def softmax(src): - #基于last轴进行rowmax(按行取最大值)处理 - max = np.max(src, axis=-1, keepdims=True) - sub = src - max - exp = np.exp(sub) - #基于last轴进行rowsum(按行求和)处理 - sum = np.sum(exp, axis=-1, keepdims=True) - dst = exp / sum - return dst - -def gloden_atten_fusion(query, key, value, atten_mask): - qk = np.matmul(query, key.transpose(0, 2, 1)) - - # @jiangli - print("attn_dim: ", query.shape[2]) - attnDimSqrt = 1 / sqrt(query.shape[2]) - attnWeight = np.multiply(qk, attnDimSqrt) - addMask = np.add(attnWeight, atten_mask) - qk_div = softmax(addMask) - - out = np.matmul(qk_div, value) - return out, qk_div - -def verify_result(real_result, golden): - print(real_result[:32]) - print(golden[:32]) - result = np.abs(real_result - golden) - deno = np.maximum(np.abs(real_result), np.abs(golden)) - result_atol = np.less_equal(result, loss) - result_rtol = np.less_equal(result / np.add(deno, minimum), loss) - if not result_rtol.all() and not result_atol.all(): - if np.sum(result_rtol == False) > real_result.size * loss and np.sum(result_atol == False) > real_result.size * loss: - print("[ERROR] result error") - return False - print("test pass") - return True - npu_device.compat.enable_v1() npu_init = npu_ops.initialize_system() npu_shutdown = npu_ops.shutdown_system() @@ -74,19 +34,15 @@ key = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) value = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) -# golden_atten_score, gold_softmax_out = gloden_atten_fusion(query, key, value, mask) - ret = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=mask) init = tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session(config=config) as sess: sess.run(init) attenScore, softmaxOut = sess.run(ret) - print("============ attention fusion =============") - print("attenScore: ") - # verify_result(attenScore, golden_atten_score) - print("softmaxOut: ") - # verify_result(softmaxOut, gold_softmax_out) + print("attenScore: ", attenScore[:32]) + print("softmaxOut: ", softmaxOut[:32]) + print("============ attention fusion end =============") diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp index c157c12..dbfc189 100644 --- a/attention_fusion/tf_ops/atten_ops.cpp +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -78,5 +78,5 @@ namespace tensorflow { c->set_output(1, c->MakeShape({shape0, shape1, shape2})); return Status::OK(); }); - REGISTER_KERNEL_BUILDER(Name("FusedAttention").Device(DEVICE_CPU), CustOps) + REGISTER_KERNEL_BUILDER(Name("AttentionFusion").Device(DEVICE_CPU), CustOps) } \ No newline at end of file -- Gitee From 9a8944db44e381c55540da91347baa1d419dde11 Mon Sep 17 00:00:00 2001 From: jiangli Date: Fri, 7 Jun 2024 10:59:57 +0800 Subject: [PATCH 07/11] update --- attention_fusion/op_host/attention_fusion.cpp | 1 + attention_fusion/test/test.py | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/attention_fusion/op_host/attention_fusion.cpp b/attention_fusion/op_host/attention_fusion.cpp index ec1dc7a..56c2430 100644 --- a/attention_fusion/op_host/attention_fusion.cpp +++ b/attention_fusion/op_host/attention_fusion.cpp @@ -156,6 +156,7 @@ static ge::graphStatus InferDtype(gert::InferShapeContext* context) { context->SetOutputDataType(0, context->GetInputDataType(0)); context->SetOutputDataType(1, context->GetInputDataType(1)); + context->SetOutputDataType(2, context->GetInputDataType(2)); return GRAPH_SUCCESS; } } diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index 573a305..8025317 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -1,7 +1,7 @@ from mpi4py import MPI import os import numpy as np -from math import sqrt +from tensorflow.python.framework import ops os.environ["DEVICE_ID"] = str(0) os.environ["ASCEND_DEVICE_ID"] = str(0) @@ -23,6 +23,23 @@ custom_op.name = "NpuOptimizer" config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF +def attention_fusion(query, key, value, atten_mask=None, attr=0) + attnOut, softmaxOut = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=atten_mask, mask_on = 1) + return attnOut, softmaxOut + +@ops.RegisterGradient("AttentionFusion") +def _npu_fusion_attention_grad(op, *grad): + query = op.inputs[0] + key = op.inputs[1] + value = op.inputs[2] + atten_mask = op.inputs[3] + + attention_out = op.outputs[0] + softmax_out = op.outputs[1] + dout = grad[0] + dQuery, dKey, dValue = tfOpLib.attention_fusion_grad(dout=dout, softmax_out=softmax_out, query=query, key=key, value=value) + return dQuery, dKey, dValue, tf.zeros(tf.shape(atten_mask)) + #测试用例 dim0 = 1024 dim1 = 1000 @@ -34,14 +51,13 @@ key = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) value = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) -ret = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=mask) -init = tf.compat.v1.global_variables_initializer() +atten_out, softmax_out = attention_fusion(query=query, key=key, value=value, atten_mask=mask, attr=1) +loss_golden = tf.reduce_mean(atten_out, key_dims=False) +grads_and_vars_golden = tf.gradients(loss_golden, [query, key, value]) with tf.compat.v1.Session(config=config) as sess: - sess.run(init) - attenScore, softmaxOut = sess.run(ret) - print("attenScore: ", attenScore[:32]) - print("softmaxOut: ", softmaxOut[:32]) + sess.run(tf.compat.v1.global_variables_initializer() ) + print(sess.run(grads_and_vars_golden)) print("============ attention fusion end =============") -- Gitee From edd99b792bc84481552bf7ac5dc8154bfdf5686e Mon Sep 17 00:00:00 2001 From: jiangli Date: Fri, 7 Jun 2024 11:13:33 +0800 Subject: [PATCH 08/11] update --- attention_fusion/test/test.py | 2 +- attention_fusion/tf_ops/atten_ops.cpp | 43 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index 8025317..463f8fe 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -23,7 +23,7 @@ custom_op.name = "NpuOptimizer" config.graph_options.rewrite_options.remapping = RewriterConfig.OFF config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF -def attention_fusion(query, key, value, atten_mask=None, attr=0) +def attention_fusion(query, key, value, atten_mask=None, attr=0): attnOut, softmaxOut = tfOpLib.attention_fusion(query=query, key=key,value=value, atten_mask=atten_mask, mask_on = 1) return attnOut, softmaxOut diff --git a/attention_fusion/tf_ops/atten_ops.cpp b/attention_fusion/tf_ops/atten_ops.cpp index dbfc189..7c75928 100644 --- a/attention_fusion/tf_ops/atten_ops.cpp +++ b/attention_fusion/tf_ops/atten_ops.cpp @@ -79,4 +79,47 @@ namespace tensorflow { return Status::OK(); }); REGISTER_KERNEL_BUILDER(Name("AttentionFusion").Device(DEVICE_CPU), CustOps) + + REGISTER_OP("AttentionFusionGrad") + .Input("dout: float") + .Input("softmax_out: float") + .Input("query: float") + .Input("key: float") + .Input("value: float") + .Output("grad_query: float") + .Output("grad_key: float") + .Output("grad_value: float") + .SetIsStateful() + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext *c) { + ShapeHandle query_shape; + ShapeHandle key_shape; + ShapeHandle value_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 3, &query_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 3, &key_shape)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 3, &value_shape)); + + tensorflow::shape_inference::DimensionHandle queryDim0 = c->Dim(query_shape, 0); + tensorflow::shape_inference::DimensionHandle queryDim1 = c->Dim(query_shape, 1); + tensorflow::shape_inference::DimensionHandle queryDim2 = c->Dim(query_shape, 2); + tensorflow::shape_inference::DimensionHandle keyDim1 = c->Dim(key_shape, 1); + tensorflow::shape_inference::DimensionHandle keyDim2 = c->Dim(key_shape, 2); + tensorflow::shape_inference::DimensionHandle valueDim1 = c->Dim(value_shape, 1); + tensorflow::shape_inference::DimensionHandle valueDim2 = c->Dim(value_shape, 2); + + int64_t qShape0 = c->Value(queryDim0); + int64_t qShape1 = c->Value(queryDim1); + int64_t qShape2 = c->Value(queryDim2); + + int64_t kShape1 = c->Value(keyDim1); + int64_t kShape2 = c->Value(keyDim2); + + int64_t vShape1 = c->Value(valueDim1); + int64_t vShape2 = c->Value(valueDim2); + + c->set_output(0, c->MakeShape({qShape0, qShape1, qShape2})); + c->set_output(1, c->MakeShape({qShape0, kShape1, kShape2})); + c->set_output(2, c->MakeShape({qShape0, vShape1, vShape2})); + return Status::OK(); + }); + REGISTER_KERNEL_BUILDER(Name("AttentionFusionGrad").Device(DEVICE_CPU), CustOps) } \ No newline at end of file -- Gitee From 0e63c4c28b1a94eaa969d0f26da70f698e4183fa Mon Sep 17 00:00:00 2001 From: jiangli Date: Fri, 7 Jun 2024 11:20:17 +0800 Subject: [PATCH 09/11] update --- attention_fusion/op_host/attention_fusion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attention_fusion/op_host/attention_fusion.cpp b/attention_fusion/op_host/attention_fusion.cpp index 56c2430..94ef024 100644 --- a/attention_fusion/op_host/attention_fusion.cpp +++ b/attention_fusion/op_host/attention_fusion.cpp @@ -152,7 +152,7 @@ static ge::graphStatus InferShape(gert::InferShapeContext* context) return GRAPH_SUCCESS; } -static ge::graphStatus InferDtype(gert::InferShapeContext* context) +static ge::graphStatus InferDtype(gert::InferDataTypeContext* context) { context->SetOutputDataType(0, context->GetInputDataType(0)); context->SetOutputDataType(1, context->GetInputDataType(1)); -- Gitee From c5e90ee78eeb4929d1e22df3c54ac1fc3a9a6ceb Mon Sep 17 00:00:00 2001 From: jiangli Date: Fri, 7 Jun 2024 11:21:23 +0800 Subject: [PATCH 10/11] update --- attention_fusion/op_host/attention_fusion.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/attention_fusion/op_host/attention_fusion.cpp b/attention_fusion/op_host/attention_fusion.cpp index 94ef024..7f5da85 100644 --- a/attention_fusion/op_host/attention_fusion.cpp +++ b/attention_fusion/op_host/attention_fusion.cpp @@ -156,7 +156,6 @@ static ge::graphStatus InferDtype(gert::InferDataTypeContext* context) { context->SetOutputDataType(0, context->GetInputDataType(0)); context->SetOutputDataType(1, context->GetInputDataType(1)); - context->SetOutputDataType(2, context->GetInputDataType(2)); return GRAPH_SUCCESS; } } -- Gitee From 89f0ec6b73ee31af3894dfee572e7945349df9a6 Mon Sep 17 00:00:00 2001 From: jiangli Date: Fri, 7 Jun 2024 11:25:40 +0800 Subject: [PATCH 11/11] update --- attention_fusion/test/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attention_fusion/test/test.py b/attention_fusion/test/test.py index 463f8fe..db62d1d 100644 --- a/attention_fusion/test/test.py +++ b/attention_fusion/test/test.py @@ -52,7 +52,7 @@ value = tf.random_uniform([dim0, dim3, dim2], maxval = 10, dtype=tf.float32) mask = tf.random_uniform([dim0, dim1, dim3], maxval = 1, dtype=tf.float32) atten_out, softmax_out = attention_fusion(query=query, key=key, value=value, atten_mask=mask, attr=1) -loss_golden = tf.reduce_mean(atten_out, key_dims=False) +loss_golden = tf.reduce_mean(atten_out, keep_dims=False) grads_and_vars_golden = tf.gradients(loss_golden, [query, key, value]) with tf.compat.v1.Session(config=config) as sess: -- Gitee