From 491af67732bd6d1959e9f386b0ec39e6cdfb1fda Mon Sep 17 00:00:00 2001 From: PaddlePaddle-Gardener Date: Fri, 14 Jan 2022 14:24:17 +0800 Subject: [PATCH] mirgate_38878 --- .../eager/accumulation/accumulation_node.cc | 1 - .../accumulation/gradient_accumulation.cc | 337 ++++++++++++++ .../eager_generated/backwards/scale_node.cc | 172 +++++++ .../eager_generated/forwards/scale.cc | 99 ++++ paddle/fluid/eager/eager_tensor.h | 1 - paddle/fluid/eager/grad_node_info.h | 1 - .../eager/legacy/infer_var_type_context.h | 260 +++++++++++ paddle/fluid/eager/legacy/prepared_operator.h | 82 ++++ paddle/fluid/eager/legacy/tensor_helper.h | 33 ++ .../framework/data_device_transform_test.cu | 11 +- paddle/fluid/framework/operator.h | 3 +- paddle/fluid/imperative/layer.h | 21 +- paddle/fluid/imperative/op_base.h | 19 + paddle/fluid/imperative/prepared_operator.h | 25 +- paddle/fluid/operators/cast_op.h | 1 - paddle/fluid/operators/conj_op.h | 3 +- paddle/fluid/operators/dot_op.h | 1 - .../elementwise/elementwise_add_op.h | 1 - .../elementwise/elementwise_mul_op.h | 1 - .../elementwise/elementwise_op_function.h | 1 - .../elementwise/elementwise_op_impl.cu.h | 1 - .../elementwise/elementwise_sub_op.h | 1 - paddle/fluid/operators/fill_any_like_op.h | 1 - paddle/fluid/operators/flatten_op.h | 1 - paddle/fluid/operators/matmul_v2_op.h | 1 - paddle/fluid/operators/reduce_ops/reduce_op.h | 2 - paddle/fluid/operators/reshape_op.cc | 1 - paddle/fluid/operators/scale_op.h | 31 +- paddle/fluid/operators/sign_op.h | 1 - paddle/fluid/pybind/eager.cc | 1 - paddle/fluid/pybind/eager_functions.cc | 1 - paddle/fluid/pybind/eager_method.cc | 1 - paddle/fluid/pybind/eager_properties.cc | 1 - paddle/fluid/pybind/eager_utils.cc | 1 - paddle/pten/CMakeLists.txt | 2 +- paddle/pten/all.cc | 0 paddle/pten/all.h | 20 - paddle/pten/api/lib/utils.cc | 81 ++++ paddle/pten/include/core.h | 0 paddle/pten/include/infermeta.h | 21 - paddle/pten/include/math.h | 39 -- paddle/pten/kernels/complex_kernel.h | 3 - paddle/pten/kernels/cpu/scale_kernel.cc | 65 +++ paddle/pten/kernels/flatten_kernel.h | 2 +- paddle/pten/kernels/gpu/scale_kernel.cu | 14 +- .../kernels/impl/matmul_grad_kernel_impl.h | 3 +- paddle/pten/kernels/impl/scale_kernel_impl.h | 50 -- paddle/pten/kernels/math_kernel.h | 3 +- paddle/pten/kernels/reshape_kernel.h | 2 +- paddle/pten/kernels/scale_kernel.h | 44 ++ paddle/pten/kernels/sign_kernel.h | 2 +- paddle/pten/tests/api/scale_api.h | 279 +++++++++++ .../pten/tests/kernels/test_scale_dev_api.cc | 116 +++++ python/paddle/utils/code_gen/api_gen.py | 435 ++++++++++++++++++ 54 files changed, 2094 insertions(+), 205 deletions(-) delete mode 100644 paddle/pten/all.cc delete mode 100644 paddle/pten/all.h delete mode 100644 paddle/pten/include/core.h delete mode 100644 paddle/pten/include/infermeta.h delete mode 100644 paddle/pten/include/math.h delete mode 100644 paddle/pten/kernels/impl/scale_kernel_impl.h diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index ed1146eed0..823c0153d7 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -18,7 +18,6 @@ #include "paddle/pten/api/all.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/eager/accumulation/gradient_accumulation.cc b/paddle/fluid/eager/accumulation/gradient_accumulation.cc index e69de29bb2..1f66596a0b 100644 --- a/paddle/fluid/eager/accumulation/gradient_accumulation.cc +++ b/paddle/fluid/eager/accumulation/gradient_accumulation.cc @@ -0,0 +1,337 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/accumulation/gradient_accumulation.h" +#include +#include +#include +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/math_function_impl.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/fluid/platform/complex.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/api/all.h" +#include "paddle/pten/core/convert_utils.h" +#include "unsupported/Eigen/CXX11/Tensor" +#ifdef PADDLE_WITH_XPU +#include "xpu/refactor/math.h" +#endif +#ifdef PADDLE_WITH_ASCEND_CL +#include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#endif + +namespace egr { +template +class TensorAddFunctor : public boost::static_visitor<> { + public: + TensorAddFunctor(int64_t numel, const T* x, T* y) + : numel_(numel), x_(x), y_(y) {} + + void operator()(const paddle::platform::CPUPlace& place) { + paddle::platform::CPUDeviceContext* ctx = + dynamic_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)); + auto blas = + paddle::operators::math::GetBlas( + *ctx); + blas.AXPY(numel_, 1., x_, y_); + } + +// TODO(jiabin): Support xpu here from gradient_accumulator.cc + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + void operator()(const paddle::platform::CUDAPlace& place) { + paddle::platform::CUDADeviceContext* ctx = + dynamic_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)); + auto blas = + paddle::operators::math::GetBlas(*ctx); + blas.AXPY(numel_, 1., x_, y_); + } +#else + void operator()(const paddle::platform::CUDAPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#endif + + // TODO(jiabin): Support Npu here from gradient_accumulator.cc + // there is NO blas in CUDAPinnedPlace + void operator()(const paddle::platform::CUDAPinnedPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } + +#ifdef PADDLE_WITH_ASCEND_CL + void operator()(const paddle::platform::NPUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#else + void operator()(const paddle::platform::NPUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#endif + +#ifdef PADDLE_WITH_XPU + void operator()(const paddle::platform::XPUPlace& place) { + paddle::platform::XPUDeviceContext* ctx = + dynamic_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)); + xpu::add(ctx->x_context(), x_, y_, y_, static_cast(numel_)); + } +#else + void operator()(const paddle::platform::XPUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#endif + +#ifdef PADDLE_WITH_MLU + void operator()(const paddle::platform::MLUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#else + void operator()(const paddle::platform::MLUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#endif + +#ifdef PADDLE_WITH_IPU + void operator()(const paddle::platform::IPUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#else + void operator()(const paddle::platform::IPUPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } +#endif + + void operator()(const paddle::platform::NPUPinnedPlace& place) { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Gradient accumulation on place (%s) " + "is not supported in imperative mode", + place)); + } + + private: + int64_t numel_; + const T* x_; + T* y_; +}; + +template +void TensorAddImpl(const std::shared_ptr& src, + pten::DenseTensor* dst, + const paddle::platform::Place& place) { + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::DeviceContext* ctx = pool.Get(place); + auto dev_ctx = dynamic_cast(ctx); + paddle::operators::math::ElementwiseAddTo func; + func(dev_ctx, *(src.get()), dst); +} + +template +void TensorAddImpl(const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst, + const paddle::platform::Place& place) { + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + paddle::platform::DeviceContext* ctx = pool.Get(place); + auto dev_ctx = dynamic_cast(ctx); + paddle::operators::math::ElementwiseAddTo func; + func(dev_ctx, src, dst); +} + +void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { + // TODO(jiabin): Support other tensor type later + std::shared_ptr dst_tensor = + std::dynamic_pointer_cast(dst->impl()); + std::shared_ptr src_tensor = + std::dynamic_pointer_cast(src.impl()); + + auto numel = src_tensor->numel(); + + if (numel == 0) { + return; + } + + PADDLE_ENFORCE_EQ( + dst_tensor->numel(), numel, + paddle::platform::errors::PreconditionNotMet( + "The number of elements of source tensor and destination tensor " + "should be equal, but got the number of elements of source tensor is " + "%zu and the number of elements of destination tensor is %zu.", + numel, dst_tensor->numel())); + + auto data_type = pten::TransToProtoVarType(src_tensor->dtype()); + auto place = src_tensor->place(); + + PADDLE_ENFORCE_EQ(pten::TransToProtoVarType(dst_tensor->dtype()), data_type, + paddle::platform::errors::PreconditionNotMet( + "The data type of source tensor and destination tensor " + "should be equal, Otherwise, the calculation results " + "will be incorrect.")); + +#define PADDLE_TENSOR_ADD(cpp_type) \ + if (data_type == paddle::framework::DataTypeTrait::DataType()) { \ + TensorAddFunctor func(numel, src_tensor->data(), \ + dst_tensor->mutable_data()); \ + boost::apply_visitor(func, place); \ + return; \ + } + + // TODO(jiabin): Support NPU here + PADDLE_TENSOR_ADD(float); +// NOTE(phlrain): xpu only support float +#ifndef PADDLE_WITH_XPU + PADDLE_TENSOR_ADD(double); + // NOTE(chenweihang): only support complex grad tensor accumulated, + // support selected rows if needed in the future + PADDLE_TENSOR_ADD(paddle::platform::complex); + PADDLE_TENSOR_ADD(paddle::platform::complex); +#endif +#undef PADDLE_TENSOR_ADD + + if (data_type == paddle::framework::proto::VarType::FP16) { + if (paddle::platform::is_gpu_place(place)) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + return TensorAddImpl(src_tensor, + dst_tensor.get(), place); +#else + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Gradient accumulation of data type (%s) on place (%s) is not " + "supported in imperative mode", + paddle::framework::DataTypeToString(data_type), place)); +#endif + } else if (paddle::platform::is_cpu_place(place)) { + return TensorAddImpl(src_tensor, + dst_tensor.get(), place); + } + } + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Gradient accumulation of data type (%s) on place (%s) is not " + "supported in imperative mode", + paddle::framework::DataTypeToString(data_type), place)); +} + +void VariableAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { + // TODO(jiabin): Support other tensor type later + auto* dst_tensor = + dst->MutableVar()->GetMutable(); + auto& src_tensor = src.Var().Get(); + + auto numel = src_tensor.numel(); + + // FIXME(minqiyang): loss_grad op will pass a zero grad of label + // ugly fix for it + if (numel == 0) { + return; + } + + PADDLE_ENFORCE_EQ( + dst_tensor->numel(), numel, + paddle::platform::errors::PreconditionNotMet( + "The number of elements of source tensor and destination tensor " + "should be equal, but got the number of elements of source tensor is " + "%zu and the number of elements of destination tensor is %zu.", + numel, dst_tensor->numel())); + + auto data_type = src_tensor.type(); + auto place = src_tensor.place(); + + PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type, + paddle::platform::errors::PreconditionNotMet( + "The data type of source tensor and destination tensor " + "should be equal, Otherwise, the calculation results " + "will be incorrect.")); + +#define PADDLE_TENSOR_ADD(cpp_type) \ + if (data_type == paddle::framework::DataTypeTrait::DataType()) { \ + TensorAddFunctor func( \ + numel, src_tensor.data(), \ + dst_tensor->mutable_data(place)); \ + boost::apply_visitor(func, place); \ + return; \ + } + + // TODO(jiabin): Support NPU here + PADDLE_TENSOR_ADD(float); +// NOTE(phlrain): xpu only support float +#ifndef PADDLE_WITH_XPU + PADDLE_TENSOR_ADD(double); + // NOTE(chenweihang): only support complex grad tensor accumulated, + // support selected rows if needed in the future + PADDLE_TENSOR_ADD(paddle::platform::complex); + PADDLE_TENSOR_ADD(paddle::platform::complex); +#endif +#undef PADDLE_TENSOR_ADD + + if (data_type == paddle::framework::proto::VarType::FP16) { + if (paddle::platform::is_gpu_place(place)) { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + return TensorAddImpl(src_tensor, dst_tensor, + place); +#else + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Gradient accumulation of data type (%s) on place (%s) is not " + "supported in imperative mode", + paddle::framework::DataTypeToString(data_type), place)); +#endif + } else if (paddle::platform::is_cpu_place(place)) { + return TensorAddImpl(src_tensor, dst_tensor, + place); + } + } + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Gradient accumulation of data type (%s) on place (%s) is not " + "supported in imperative mode", + paddle::framework::DataTypeToString(data_type), place)); +} + +} // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index e69de29bb2..99f6c7a835 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/eager_tensor.h" + +#include "paddle/pten/kernels/scale_kernel.h" + +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/errors.h" + +#include "glog/logging.h" + +namespace egr { + +template +static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor, + const DeviceContext& dev_ctx, float scale, + float bias, bool bias_after_scale, + pten::DenseTensor* dense_out) { + switch (dense_tensor.dtype()) { + case pten::DataType::FLOAT64: { + pten::ScaleKernel( + dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::FLOAT32: { + pten::ScaleKernel( + dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::INT64: { + pten::ScaleKernel( + dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::INT32: { + pten::ScaleKernel( + dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + default: { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported data type." + "Only Float64, Float32, Int64, Int32 are supported for now.")); + break; + } + } +} + +void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, egr::EagerTensor* out) { + // TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a + // proper way to Demo it + // Run Forward Function + auto dense_tensor = std::dynamic_pointer_cast(x.impl()); + // Init output tensor + auto tensor_meta = pten::DenseTensorMeta( + dense_tensor->dtype(), dense_tensor->dims(), dense_tensor->layout()); + auto place = dense_tensor->place(); + size_t bytes_size = paddle::framework::product(dense_tensor->dims()) * + SizeOf(dense_tensor->dtype()); + auto dense_out = std::make_shared( + pten::make_intrusive( + paddle::memory::Alloc(place, bytes_size)), + std::move(tensor_meta)); + // Handle Device Context + const paddle::platform::Place& expected_kernel_place = + Controller::Instance().GetExpectedPlace(); + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + + if (expected_kernel_place == paddle::platform::CPUPlace()) { + auto* dev_ctx = dynamic_cast( + pool.Get(expected_kernel_place)); + if (!dev_ctx) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cannot convert device_context to CPUDeviceContext." + "This indicates backend mismatch." + "Pleas double check your expected place")); + } + ScaleDeviceDispatch( + *dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, + dense_out.get()); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (expected_kernel_place == paddle::platform::CUDAPlace()) { + auto* dev_ctx = dynamic_cast( + pool.Get(expected_kernel_place)); + if (!dev_ctx) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cannot convert device_context to CUDADeviceContext." + "This indicates backend mismatch." + "Pleas double check your expected place")); + } + ScaleDeviceDispatch( + *dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, + dense_out.get()); +#endif + } else { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported backend." + "Only CPU and CUDA Backend are supported for now." + "Please double check if your backend falls into the above two " + "categories.")); + } + + out->set_impl(dense_out); +} + +void GradNodeScale::SetTensorWrappers_X( + const std::vector& tensors) { + // Does nothing for scale +} + +void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } + +std::vector> GradNodeScale::operator()( + const std::vector>& grads) { + // 1. Check Output Size + PADDLE_ENFORCE( + ((grads.size() == 1) && (grads[0].size() == 1)), + paddle::platform::errors::Fatal( + "ScaleGradNode takes exactly 1 grad tensor." + "However received: %d", + "This indicates an issue with Eager Dygraph Backward logic", + grads.size())); + std::vector> outs; + // 2. Create needed out parttern + egr::EagerTensor out; + // Apply Gradient Hooks + if (GradientHooksRegistered()) { + // TODO(jiabin): Shall we apply hook slot by slot here or accept + // vector> to apply all hooks? + std::vector> hooked_grads = + ApplyGradientHooks(grads); + ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, + true /* bias_after_scale */, &out); + } else { + ScaleAPI(grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */, + &out); + } + + // Apply Reduce Hooks + if (ReduceHooksRegistered()) { + ApplyReduceHooks(); + } + return {{out}}; +} + +} // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index e69de29bb2..642302a411 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * This File should be automatically generated by coding auto generator. + * All ops C++ autograd logic is defined here, in Python-C extension API + * system we try to avoid any autograd related code, and move them all to + * here. + * + * Currently, we just manually do some fwd autograd here. And we will replace + * them with auto code generator later. + * **/ + +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/eager/utils.h" + +#include "paddle/pten/api/all.h" + +namespace egr { + +egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, bool trace_backward) { + // 1. Run Forward + // 1.1 Create outputs + egr::EagerTensor out; + // 1.2 Need by original op, we assemble ins, outs, attrs here + + // 1.3 Call forward C++ api + ScaleAPI(x, scale, bias, bias_after_scale, &out); + + // 2. Build Backward Depends + // 2.1 Get AutogradMetas for all ins and outs + auto p_autograd_in = EagerUtils::unsafe_autograd_meta(x); + // NOTE: Call EagerUtils::multi_autograd_meta when we have vector of outputs + auto p_autograd_out = EagerUtils::autograd_meta(&out); + + // 2.2 Add GradNode + // 2.2.1 ComputeRequireGrad + // TODO(jiabin) : make this function accept different kinds of input + // TODO(zhanlve): which one is more efficient: + // 1. construct a vector of pointers + // 2. call "ComputeRequireGrad" multiple times + bool require_any_grad = + EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_in); + if (require_any_grad) { + EagerUtils::PassStopGradient(false /*generate_grad*/, p_autograd_out); + + // 2.2.2 Set OutRankInfo for outputs this needs to be as same as Edges's + // input_rank_ + /** Note: + // 1. We provide EagerUtils::SetMultiOutRank(vector), + // since we have some of Operator has servel slot name with duplicate + outputs. + // 2. We call AutogradMeta's SetOutput Rank only when we have single output + with + // single slot name. + **/ + p_autograd_out->SetSingleOutRankWithSlot(0, 0); + + // Init GradNode + auto scale_node = std::make_shared(/* fwd_in_slot_num */ 1, + /* bwd_in_slot_num */ 1); + + // Pass Attributes to GradNode + scale_node->SetAttributes_scale(scale); + + // Set Next Edges + scale_node->AddEdges(p_autograd_in, /*slot id*/ 0); + + // Set TensorWrappers + scale_node->SetTensorWrappers_X({x}); + + // Set Grad out rank as same as fwd input and set stop gradient to bwd + scale_node->SetGradOutMeta(*p_autograd_in, /*slot id*/ 0); + // Set Grad out rank as same as fwd input and set stop gradient to bwd + scale_node->SetGradInMeta(*p_autograd_out, /*slot id*/ 0); + + // Set History for output set current Grad Node for + EagerUtils::SetHistory(p_autograd_out, scale_node); + } + + return out; +} + +} // namespace egr diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 80faad9080..c58c0b9e66 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -18,7 +18,6 @@ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" // pten deps -#include "paddle/pten/all.h" #include "paddle/pten/api/all.h" #include "paddle/pten/api/lib/api_declare.h" #include "paddle/pten/api/lib/utils/tensor_utils.h" diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index f15c50ef75..5cf0b90220 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -16,7 +16,6 @@ #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/pten/api/all.h" -#include "paddle/pten/include/core.h" namespace egr { /** diff --git a/paddle/fluid/eager/legacy/infer_var_type_context.h b/paddle/fluid/eager/legacy/infer_var_type_context.h index e69de29bb2..9d9cbeb38c 100644 --- a/paddle/fluid/eager/legacy/infer_var_type_context.h +++ b/paddle/fluid/eager/legacy/infer_var_type_context.h @@ -0,0 +1,260 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/eager/legacy/tensor_helper.h" +#include "paddle/fluid/eager/legacy/type_def.h" +#include "paddle/fluid/framework/type_defs.h" +#include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/framework/var_type_inference.h" +#include "paddle/fluid/framework/var_type_traits.h" +#include "paddle/pten/api/all.h" + +namespace egr { +namespace legacy { + +// infer var type context for imperative mode +class TensorRuntimeInferVarTypeContext + : public paddle::framework::InferVarTypeContext { + public: + TensorRuntimeInferVarTypeContext( + const NameTensorMap& inputs, const NameTensorMap& outputs, + const paddle::framework::AttributeMap& attrs_map, + const paddle::framework::AttributeMap& default_attrs_map) + : InferVarTypeContext(nullptr, nullptr), + inputs_(inputs), + outputs_(outputs), + attrs_(attrs_map), + default_attrs_(default_attrs_map) {} + + virtual ~TensorRuntimeInferVarTypeContext() {} + + paddle::framework::Attribute GetAttr(const std::string& name) const override { + auto it = attrs_.find(name); + + if (it == attrs_.end()) { + it = default_attrs_.find(name); + if (it == default_attrs_.end()) { + PADDLE_THROW(paddle::platform::errors::NotFound( + "Can not find [%s] in attributes.", name)); + } + } + + return it->second; + } + + bool HasInput(const std::string& name) const override { + auto it = inputs_.find(name); + return (it != inputs_.end() && it->second.size() > 0); + } + + bool HasOutput(const std::string& name) const override { + auto it = outputs_.find(name); + return (it != outputs_.end() && it->second.size() > 0); + } + + size_t InputSize(const std::string& name) const { + return inputs_.at(name).size(); + } + + const std::string& InputVarName(const std::string& name, + const int index = 0) const { + // TODO(jiabin): Support this usage inputs_.at(name)[index]->Name() + auto it = inputs_.find(name); + PADDLE_ENFORCE_NE(it, inputs_.end(), + paddle::platform::errors::PreconditionNotMet( + "Can not find [%s] in Input", name)); + return inputs_.at(name)[index]->name(); + } + + bool InputTypeAnyOf( + const std::string& name, + paddle::framework::proto::VarType::Type type) const override { + auto& inputs = inputs_.at(name); + return std::any_of( + inputs.begin(), inputs.end(), + [&type](const std::shared_ptr& var) { + return paddle::framework::ToVarType(var->Var().Type()) == type; + }); + } + + bool InputTypeAllOf( + const std::string& name, + paddle::framework::proto::VarType::Type type) const override { + auto& inputs = inputs_.at(name); + return std::all_of( + inputs.begin(), inputs.end(), + [&type](const std::shared_ptr& var) { + return paddle::framework::ToVarType(var->Var().Type()) == type; + }); + } + + void SyncTypeAndDataType(const std::string& input_name, + const std::string& output_name, + int index = 0) override { + auto in_tensor = inputs_.at(input_name)[index]; + auto out_tensor = outputs_.at(output_name)[index]; + if (in_tensor != out_tensor) { + this->SetTensorType( + out_tensor, paddle::framework::ToVarType(in_tensor->Var().Type())); + } + } + + void SetOutputType(const std::string& name, + paddle::framework::proto::VarType::Type type, + int index = 0) override { + if (index == paddle::framework::ALL_ELEMENTS) { + for (auto& item : outputs_.at(name)) { + this->SetTensorType(item, type); + } + } else { + auto& var = outputs_.at(name)[index]; + this->SetTensorType(var, type); + } + } + + void SetTensorType(std::shared_ptr out, + paddle::framework::proto::VarType::Type type) { + switch (type) { + case paddle::framework::proto::VarType::LOD_TENSOR: { + out->MutableVar()->GetMutable(); + break; + } + default: { + PADDLE_THROW(paddle::platform::errors::NotFound( + "Cannot found var type: %s while running runtime InferVarType", + paddle::framework::ToTypeName(type))); + } + } + } + + paddle::framework::proto::VarType::Type GetInputType( + const std::string& name, const int& index = 0) const override { + return paddle::framework::ToVarType(inputs_.at(name)[index]->Var().Type()); + } + + paddle::framework::proto::VarType::Type GetOutputType( + const std::string& name, const int& index = 0) const override { + // TODO(jiabin): Support SelectedRows when we have it. + return paddle::framework::proto::VarType::LOD_TENSOR; + } + + paddle::framework::proto::VarType::Type GetInputDataType( + const std::string& name, const int& index = 0) const override { + return inputs_.at(name)[index] + ->Var() + .Get() + .type(); + } + + void SetOutputDataType(const std::string& name, + paddle::framework::proto::VarType::Type type, + int index = 0) override { + // TODO(jiabin): It seems doesn't make sense to set data_type in EagerMode. + } + + bool IsDygraph() const override { return true; } + + protected: + bool HasVar(const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "HasVar is not supported in runtime InferVarType")); + } + + const std::vector& InputVars( + const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "InputVars is not supported in runtime InferVarType")); + } + + const std::vector& OutputVars( + const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "OutputVars is not supported in runtime InferVarType")); + } + + paddle::framework::proto::VarType::Type GetVarType( + const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not manipulate var in runtime InferVarType")); + } + + void SetVarType(const std::string& name, + paddle::framework::proto::VarType::Type type) override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not manipulate var in runtime InferVarType")); + } + + paddle::framework::proto::VarType::Type GetVarDataType( + const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not manipulate var in runtime InferVarType")); + } + + void SetVarDataType(const std::string& name, + paddle::framework::proto::VarType::Type type) override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not manipulate var in runtime InferVarType")); + } + + std::vector GetVarDataTypes( + const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "GetVarDataTypes is not supported in runtime InferVarType")); + } + + void SetVarDataTypes( + const std::string& name, + const std::vector& + multiple_data_type) override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "SetVarDataTypes is not supported in runtime InferVarType")); + } + + std::vector GetVarShape(const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not handle Shape in runtime InferVarType")); + } + + void SetVarShape(const std::string& name, + const std::vector& dims) override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not handle Shape in runtime InferVarType")); + } + + int32_t GetVarLoDLevel(const std::string& name) const override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not handle LoDLevel in runtime InferVarType")); + } + + void SetVarLoDLevel(const std::string& name, int32_t lod_level) override { + PADDLE_THROW(paddle::platform::errors::PermissionDenied( + "Do not handle LoDLevel in runtime InferVarType")); + } + + private: + const NameTensorMap& inputs_; + const NameTensorMap& outputs_; + const paddle::framework::AttributeMap& attrs_; + const paddle::framework::AttributeMap& default_attrs_; +}; + +} // namespace legacy +} // namespace egr diff --git a/paddle/fluid/eager/legacy/prepared_operator.h b/paddle/fluid/eager/legacy/prepared_operator.h index e69de29bb2..0e00b52e04 100644 --- a/paddle/fluid/eager/legacy/prepared_operator.h +++ b/paddle/fluid/eager/legacy/prepared_operator.h @@ -0,0 +1,82 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include + +#include "paddle/fluid/eager/legacy/execution_context.h" +#include "paddle/fluid/eager/legacy/type_def.h" +#include "paddle/fluid/framework/data_transform.h" +#include "paddle/fluid/framework/op_kernel_type.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/type_defs.h" + +DECLARE_bool(use_mkldnn); + +namespace paddle { +namespace framework { +class Tensor; +class Variable; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + +namespace egr { +namespace legacy { + +const paddle::framework::Tensor* GetTensorFromVar( + const paddle::framework::Variable& var); + +std::shared_ptr PrepareData( + const paddle::framework::OperatorWithKernel& op, const NameTensorMap& ins, + const paddle::framework::OpKernelType& expected_kernel_key); + +class PreparedOp { + public: + PreparedOp(const paddle::framework::OperatorBase& op, + const paddle::framework::RuntimeContext& ctx, + const paddle::framework::OpKernelType& kernel_type, + const paddle::framework::OperatorWithKernel::OpKernelFunc& func, + paddle::platform::DeviceContext* dev_ctx); + + static PreparedOp Prepare( + const NameTensorMap& ins, const NameTensorMap& outs, + const paddle::framework::OperatorWithKernel& op, + const paddle::platform::Place& place, + const paddle::framework::AttributeMap& attrs, + const paddle::framework::AttributeMap& default_attrs); + + void Run(const NameTensorMap& in, const NameTensorMap& out, + const paddle::framework::AttributeMap& attrs, + const paddle::framework::AttributeMap& default_attrs); + + const paddle::framework::OpKernelType& kernel_type() const { + return kernel_type_; + } + + private: + const paddle::framework::OperatorBase& op_; + const paddle::framework::RuntimeContext& ctx_; + paddle::framework::OpKernelType kernel_type_; + paddle::framework::OperatorWithKernel::OpKernelFunc func_; + paddle::platform::DeviceContext* dev_ctx_; +}; + +} // namespace legacy +} // namespace egr diff --git a/paddle/fluid/eager/legacy/tensor_helper.h b/paddle/fluid/eager/legacy/tensor_helper.h index e69de29bb2..ce407f8965 100644 --- a/paddle/fluid/eager/legacy/tensor_helper.h +++ b/paddle/fluid/eager/legacy/tensor_helper.h @@ -0,0 +1,33 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/pten/api/all.h" +namespace egr { +namespace legacy { + +void InitializeVariable(paddle::framework::Variable* var, + paddle::framework::proto::VarType::Type var_type); +paddle::framework::proto::VarType::Type GetDtypeFromVar( + const paddle::framework::Variable& var); +const paddle::platform::Place& GetPlaceFromVar( + const paddle::framework::Variable& var); +void CopyVariable(const paddle::framework::Variable& src_var, + paddle::framework::Variable* dst_var); + +} // namespace legacy +} // namespace egr diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index 4e5be2e535..858688dffd 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -23,6 +23,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/fluid/framework/pten_utils.h" + namespace paddle { namespace framework { @@ -73,9 +75,12 @@ class TestKernel : public OpKernel { output->Resize(input->dims()); output->mutable_data(ctx.GetPlace()); - operators::TransformFunctor, T, DeviceContext> functor( - input, input, output, ctx.template device_context(), - AddFunctor()); + auto pt_input = paddle::experimental::MakePtenDenseTensor(*input); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*output); + + pten::funcs::TransformFunctor, T, DeviceContext> functor( + *pt_input, *pt_input, pt_out.get(), + ctx.template device_context(), AddFunctor()); functor.Run(); } }; diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 8e69f96dfb..9d75c66beb 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -41,7 +41,8 @@ limitations under the License. */ #include "paddle/utils/flat_hash_map.h" #include "paddle/pten/core/arg_map_context.h" -#include "paddle/pten/include/core.h" +#include "paddle/pten/core/kernel_context.h" +#include "paddle/pten/core/kernel_factory.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 16580627ed..d27460aeec 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -25,6 +25,7 @@ #include #include +#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/var_type.h" @@ -36,7 +37,6 @@ #include "paddle/fluid/imperative/variable_wrapper.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" - namespace paddle { namespace framework { class Variable; @@ -211,6 +211,8 @@ class VarBase { framework::proto::VarType::Type DataType() const { return var_->DataType(); } + size_t ElementSize() const { return framework::SizeOfType(var_->DataType()); } + void SetForwardDataType(framework::proto::VarType::Type data_type) { var_->SetForwardDataType(data_type); } @@ -221,7 +223,10 @@ class VarBase { const platform::Place Place() const { return var_->Place(); } - void ClearGradient(); + void ClearGradient(bool set_to_zero = true); + + void _GradientSetEmpty(bool is_empty = true); + bool _IsGradientSetEmpty(); std::shared_ptr NewVarBase(const platform::Place& dst_place, const bool blocking) const; @@ -230,6 +235,8 @@ class VarBase { void BumpInplaceVersion(); + void _CopyGradientFrom(const imperative::VarBase& src); + /* Hook related method: now only used for GradVarBase */ bool HasVariableWrapperHook() const { return var_->HasVariableWrapperHook(); } @@ -275,16 +282,6 @@ class VarBase { static ThreadSafeNameSet name_set_; }; -class Layer { - public: - virtual ~Layer() {} - - virtual std::vector> Forward( - const std::vector>& inputs) { - return {}; - } -}; - std::shared_ptr CreateGradOpNode( const framework::OperatorBase& op, const NameVarBaseMap& ins, const NameVarBaseMap& outs, const framework::AttributeMap& attrs, diff --git a/paddle/fluid/imperative/op_base.h b/paddle/fluid/imperative/op_base.h index acb125a829..cb76a82353 100644 --- a/paddle/fluid/imperative/op_base.h +++ b/paddle/fluid/imperative/op_base.h @@ -183,6 +183,21 @@ class OpBase { const framework::AttributeMap& default_attrs, const platform::Place& place); + static pten::KernelContext* GetKernelContext() { return &pt_kernel_context_; } + + bool HasVoidFunctionPostHook() const { + return !void_function_post_hooks_.empty(); + } + + void AddVoidFunctionPostHook(std::shared_ptr>&& hook) { + void_function_post_hooks_.emplace_back(std::move(hook)); + } + + const std::vector>>& + GetVoidFunctionPostHooks() const { + return void_function_post_hooks_; + } + private: static const std::string& UnknownOpType() { static std::string kUnknownOpType{"unknown"}; @@ -197,6 +212,10 @@ class OpBase { std::unique_ptr op_; platform::Place place_; size_t id_{-1UL}; + // In order to reduce the compatibility phase + // performance overhead, temporarily cache KernelContext + static pten::KernelContext pt_kernel_context_; + std::vector>> void_function_post_hooks_; }; class GradOpNode { diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 53f876c498..29747e79ef 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -21,6 +21,8 @@ #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/pten_utils.h" +#include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/execution_context.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/type_defs.h" @@ -147,19 +149,29 @@ class PreparedOp { const framework::OperatorWithKernel::OpKernelFunc& func, platform::DeviceContext* dev_ctx); + PreparedOp(const framework::OperatorBase& op, + const framework::RuntimeContext& ctx, + const framework::OpKernelType& kernel_type, + const framework::KernelSignature& kernel_signature, + const pten::Kernel& pt_kernel, + pten::KernelContext* pt_kernel_context, + platform::DeviceContext* dev_ctx); + static PreparedOp Prepare(const NameVarMap& ins, const NameVarMap& outs, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs); + const framework::AttributeMap& default_attrs, + pten::KernelContext* pt_kernel_context = nullptr); static PreparedOp Prepare(const NameVarMap& ins, const NameVarMap& outs, const framework::OperatorWithKernel& op, const platform::Place& place, const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs); + const framework::AttributeMap& default_attrs, + pten::KernelContext* pt_kernel_context = nullptr); void Run(const NameVarMap& in, const NameVarMap& out, const framework::AttributeMap& attrs, @@ -178,6 +190,15 @@ class PreparedOp { framework::OpKernelType kernel_type_; framework::OperatorWithKernel::OpKernelFunc func_; platform::DeviceContext* dev_ctx_; + // NOTE(chenweihang): Similar op members are used to adapt to + // new pten kernel, if there is a better design in the future, + // we may polish the implementation here + bool run_pten_kernel_{false}; + framework::KernelSignature pt_kernel_signature_; + pten::Kernel pt_kernel_; + // In order to reduce the compatibility phase + // performance overhead, temporarily cache KernelContext + pten::KernelContext* pt_kernel_context_; }; } // namespace imperative diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 72aa9a195e..c54c811b25 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/platform/transform.h" #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/cast_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/conj_op.h b/paddle/fluid/operators/conj_op.h index 71115c2eba..6df982abb8 100644 --- a/paddle/fluid/operators/conj_op.h +++ b/paddle/fluid/operators/conj_op.h @@ -19,7 +19,6 @@ // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/complex_kernel.h" namespace paddle { @@ -39,7 +38,7 @@ class ConjKernel : public framework::OpKernel { auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); // call new kernel - pten::ConjKernel(dev_ctx, *pt_x.get(), pt_out.get()); + pten::ConjKernel(dev_ctx, *pt_x.get(), pt_out.get()); } }; diff --git a/paddle/fluid/operators/dot_op.h b/paddle/fluid/operators/dot_op.h index 8817e2f3ca..ceb8a28e8a 100644 --- a/paddle/fluid/operators/dot_op.h +++ b/paddle/fluid/operators/dot_op.h @@ -21,7 +21,6 @@ // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/dot_grad_kernel.h" #include "paddle/pten/kernels/dot_kernel.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 35807d7c57..622a6d7edb 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -18,7 +18,6 @@ limitations under the License. */ #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" -// only can include the headers in paddle/pten/include dirs #include "paddle/pten/kernels/math_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 385c7549e0..687340b668 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/platform/cpu_info.h" -// only can include the headers in paddle/pten/include dirs #include "paddle/pten/kernels/math_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 37d29ed91b..626046890f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -29,7 +29,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/transform.h" -// only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/kernels/cpu/elementwise.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h index 36ff1ae254..9cc741344e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h @@ -22,7 +22,6 @@ limitations under the License. */ // only can include the headers in paddle/top/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/gpu/elementwise.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h index 09818380d8..f035e46d1d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h @@ -16,7 +16,6 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" -// only can include the headers in paddle/pten/include dirs #include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fill_any_like_op.h b/paddle/fluid/operators/fill_any_like_op.h index 287bbbfa3b..19f6e7a4ef 100644 --- a/paddle/fluid/operators/fill_any_like_op.h +++ b/paddle/fluid/operators/fill_any_like_op.h @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/pten_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/full_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index ef42619bfe..8e54ecb922 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -20,7 +20,6 @@ limitations under the License. */ #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/empty_kernel.h" #include "paddle/pten/kernels/flatten_grad_kernel.h" #include "paddle/pten/kernels/flatten_kernel.h" diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index e93bd21286..9ab77cdcae 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -27,7 +27,6 @@ limitations under the License. */ // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/matmul_grad_kernel.h" #include "paddle/pten/kernels/matmul_kernel.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index e1854d8a13..eb4d4a5c16 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -26,8 +26,6 @@ limitations under the License. */ // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/cpu/reduce.h" #if defined(__HIPCC__) || defined(__NVCC__) diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index a25e53aac5..47b8da70ad 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -20,7 +20,6 @@ limitations under the License. */ // only can include the headers in paddle/pten/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/common/scalar_array.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/reshape_grad_kernel.h" #include "paddle/pten/kernels/reshape_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h index e7a07810c6..a6f4f6e272 100644 --- a/paddle/fluid/operators/scale_op.h +++ b/paddle/fluid/operators/scale_op.h @@ -14,9 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/fluid/framework/pten_utils.h" + +// only can include the headers in paddle/top/api dirs +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/kernels/scale_kernel.h" namespace paddle { namespace operators { @@ -33,6 +36,7 @@ static inline T GetAttrFromTensor(const framework::Tensor* tensor) { return tensor_data[0]; } +// See Note [ Why still keep the original kernel implementation? ] template class ScaleKernel : public framework::OpKernel { public: @@ -40,13 +44,13 @@ class ScaleKernel : public framework::OpKernel { auto* in_var = ctx.InputVar("X"); auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var); - auto bias = static_cast(ctx.Attr("bias")); + auto bias = ctx.Attr("bias"); auto bias_after_scale = ctx.Attr("bias_after_scale"); - auto scale = static_cast(ctx.Attr("scale")); + auto scale = ctx.Attr("scale"); if (ctx.HasInput("ScaleTensor")) { auto* scale_tensor = ctx.Input("ScaleTensor"); - scale = GetAttrFromTensor(scale_tensor); + scale = static_cast(GetAttrFromTensor(scale_tensor)); } auto* out_var = ctx.OutputVar("Out"); @@ -56,22 +60,17 @@ class ScaleKernel : public framework::OpKernel { out_slr->set_rows(in_slr.rows()); out_slr->set_height(in_slr.height()); } - auto* out = framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); out->mutable_data(in->place()); + auto& dev_ctx = ctx.device_context(); - PADDLE_ENFORCE_EQ(in->dims(), out->dims(), - paddle::platform::errors::InvalidArgument( - "the input and output should have the same dim" - "but input dim is %s, output dim is %s", - in->dims(), out->dims())); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*in); + auto pt_out = paddle::experimental::MakePtenDenseTensor(*out); - auto eigen_out = framework::EigenVector::Flatten(*out); - auto eigen_in = framework::EigenVector::Flatten(*in); - auto& dev = *ctx.template device_context().eigen_device(); - EigenScale, T>::Eval( - dev, eigen_out, eigen_in, scale, bias, bias_after_scale); + // call new kernel + pten::ScaleKernel(dev_ctx, *pt_x.get(), scale, bias, bias_after_scale, + pt_out.get()); } }; diff --git a/paddle/fluid/operators/sign_op.h b/paddle/fluid/operators/sign_op.h index b8dd44c01b..8294cd2c5f 100644 --- a/paddle/fluid/operators/sign_op.h +++ b/paddle/fluid/operators/sign_op.h @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/pten/include/core.h" #include "paddle/pten/kernels/sign_kernel.h" namespace paddle { diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 9484d506b2..102bc9f162 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -26,7 +26,6 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 659df6b9b4..aaf86bc41a 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -34,7 +34,6 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index a0067f9c64..a8c1da2a8b 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -31,7 +31,6 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 71b8bbbb1a..038a1254d7 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -28,7 +28,6 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" #pragma GCC diagnostic ignored "-Wwrite-strings" namespace paddle { diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 9849d0d416..c1049d2407 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -26,7 +26,6 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/core.h" namespace paddle { namespace pybind { diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 6a823ff367..a9b7c7581b 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -29,4 +29,4 @@ get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) message(STATUS "All standard pten kernels: ${pten_kernels}") set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) -cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) +cc_library(pten DEPS ${PTEN_DEPS}) diff --git a/paddle/pten/all.cc b/paddle/pten/all.cc deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/paddle/pten/all.h b/paddle/pten/all.h deleted file mode 100644 index c8be629b10..0000000000 --- a/paddle/pten/all.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// developer apis -#include "paddle/pten/include/core.h" -#include "paddle/pten/include/infermeta.h" -#include "paddle/pten/include/math.h" diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index e69de29bb2..6eb1e5a379 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -0,0 +1,81 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/include/utils.h" + +#include + +#include "glog/logging.h" + +#include "paddle/pten/api/lib/api_registry.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/infermeta/unary.h" + +PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); +#endif + +#ifdef PADDLE_WITH_XPU +PT_DECLARE_KERNEL(copy, XPU, ALL_LAYOUT); +#endif + +namespace paddle { +namespace experimental { + +PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { + // 1. Get kernel signature and kernel + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "copy", kernel_key); + + VLOG(0) << "to API kernel key: " << kernel_key; + VLOG(0) << "to API kernel: " << kernel; + + // 2. Get Device Context + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto kernel_context = pten::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x); + kernel_context.EmplaceBackAttr(blocking); + + // 4. InferMeta + auto out_meta = UnchangedInferMeta(dense_x->meta()); + + // 5. Prepare outputs + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(backend)), + std::move(out_meta)); + kernel_context.EmplaceBackOutput(dense_out); + Tensor out; + out.set_impl(dense_out); + + // 6. Call kernel + kernel(&kernel_context); + + return out; +} + +} // namespace experimental +} // namespace paddle + +PT_REGISTER_API(Utils); diff --git a/paddle/pten/include/core.h b/paddle/pten/include/core.h deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/paddle/pten/include/infermeta.h b/paddle/pten/include/infermeta.h deleted file mode 100644 index 5e356dd37c..0000000000 --- a/paddle/pten/include/infermeta.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/infermeta/binary.h" -#include "paddle/pten/infermeta/multiary.h" -#include "paddle/pten/infermeta/nullary.h" -#include "paddle/pten/infermeta/unary.h" diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h deleted file mode 100644 index a4fb7f4d98..0000000000 --- a/paddle/pten/include/math.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// See Note: [ How do we organize the kernel directory ] -#include "paddle/pten/api/lib/utils/storage.h" -#include "paddle/pten/include/infermeta.h" -#include "paddle/pten/kernels/scale_kernel.h" - -namespace pten { - -template -DenseTensor Scale(const ContextT& dev_ctx, - const DenseTensor& x, - const Scalar& scale, - float bias, - bool bias_after_scale) { - auto out_meta = UnchangedInferMeta(x.meta()); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Scale(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); - return dense_out; -} - -} // namespace pten diff --git a/paddle/pten/kernels/complex_kernel.h b/paddle/pten/kernels/complex_kernel.h index 9dd3d457e4..b6074f117e 100644 --- a/paddle/pten/kernels/complex_kernel.h +++ b/paddle/pten/kernels/complex_kernel.h @@ -15,9 +15,6 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/infermeta.h" -#include "paddle/pten/kernels/empty_kernel.h" - #include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" diff --git a/paddle/pten/kernels/cpu/scale_kernel.cc b/paddle/pten/kernels/cpu/scale_kernel.cc index e69de29bb2..0582fb87b4 100644 --- a/paddle/pten/kernels/cpu/scale_kernel.cc +++ b/paddle/pten/kernels/cpu/scale_kernel.cc @@ -0,0 +1,65 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/scale_kernel.h" + +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/funcs/eigen/common.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/operators/eigen/eigen_function.h" +#include "paddle/fluid/platform/bfloat16.h" +namespace pten { + +template +void ScaleKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + DenseTensor* out) { + // calc + out->mutable_data(); + auto eigen_out = pten::EigenVector::Flatten(*out); + auto eigen_x = pten::EigenVector::Flatten(x); + auto& dev = *dev_ctx.eigen_device(); + // TODO(chenweihang): now the eigen function here need the dtype of scale, + // eigen_x, bias should be same, so here need cast for two scalar arg, + // maybe we declare that the type of scale and bias is T? + paddle::operators::EigenScale, T>::Eval( + dev, + eigen_out, + eigen_x, + scale.to(), + static_cast(bias), + bias_after_scale); +} + +} // namespace pten + +PT_REGISTER_CTX_KERNEL(scale, + CPU, + ALL_LAYOUT, + pten::ScaleKernel, + float, + double, + paddle::platform::bfloat16, + uint8_t, + int8_t, + int16_t, + int, + int64_t) {} diff --git a/paddle/pten/kernels/flatten_kernel.h b/paddle/pten/kernels/flatten_kernel.h index a67e66fac4..c974fda1ed 100644 --- a/paddle/pten/kernels/flatten_kernel.h +++ b/paddle/pten/kernels/flatten_kernel.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/infermeta.h" +#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" namespace pten { diff --git a/paddle/pten/kernels/gpu/scale_kernel.cu b/paddle/pten/kernels/gpu/scale_kernel.cu index 68574c063e..ff7e2a6ed2 100644 --- a/paddle/pten/kernels/gpu/scale_kernel.cu +++ b/paddle/pten/kernels/gpu/scale_kernel.cu @@ -44,12 +44,12 @@ struct ScaleFunctor { }; template -void Scale(const ContextT& dev_ctx, - const DenseTensor& x, - const Scalar& scale, - float bias, - bool bias_after_scale, - DenseTensor* out) { +void ScaleKernel(const ContextT& dev_ctx, + const DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + DenseTensor* out) { std::vector inputs; std::vector outputs; inputs.emplace_back(&x); @@ -67,7 +67,7 @@ void Scale(const ContextT& dev_ctx, PT_REGISTER_CTX_KERNEL(scale, GPU, ALL_LAYOUT, - pten::Scale, + pten::ScaleKernel, float, double, paddle::platform::float16, diff --git a/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h b/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h index 802cc019d7..b1bae78ddc 100644 --- a/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/matmul_grad_kernel_impl.h @@ -14,8 +14,7 @@ limitations under the License. */ #pragma once -// #include "paddle/pten/kernels/complex_kernel.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/complex_kernel.h" #include "paddle/pten/kernels/empty_kernel.h" #include "paddle/pten/kernels/impl/dot_grad_kernel_impl.h" #include "paddle/pten/kernels/impl/matmul_kernel_impl.h" diff --git a/paddle/pten/kernels/impl/scale_kernel_impl.h b/paddle/pten/kernels/impl/scale_kernel_impl.h deleted file mode 100644 index 2e0b158b36..0000000000 --- a/paddle/pten/kernels/impl/scale_kernel_impl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/kernels/funcs/eigen/common.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/operators/eigen/eigen_function.h" - -namespace pten { - -template -void Scale(const Context& dev_ctx, - const DenseTensor& x, - const Scalar& scale, - float bias, - bool bias_after_scale, - DenseTensor* out) { - // calc - out->mutable_data(); - auto eigen_out = pten::EigenVector::Flatten(*out); - auto eigen_x = pten::EigenVector::Flatten(x); - auto& dev = *dev_ctx.eigen_device(); - // TODO(chenweihang): now the eigen function here need the dtype of scale, - // eigen_x, bias should be same, so here need cast for two scalar arg, - // maybe we declare that the type of scale and bias is T? - paddle::operators::EigenScale, T>::Eval( - dev, - eigen_out, - eigen_x, - scale.to(), - static_cast(bias), - bias_after_scale); -} - -} // namespace pten diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h index f87d0a31b4..e01103fc5b 100644 --- a/paddle/pten/kernels/math_kernel.h +++ b/paddle/pten/kernels/math_kernel.h @@ -16,7 +16,8 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/infermeta.h" +#include "paddle/pten/infermeta/binary.h" +#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" namespace pten { diff --git a/paddle/pten/kernels/reshape_kernel.h b/paddle/pten/kernels/reshape_kernel.h index faa51c69ad..293f6cd2ba 100644 --- a/paddle/pten/kernels/reshape_kernel.h +++ b/paddle/pten/kernels/reshape_kernel.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/infermeta.h" +#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" namespace pten { diff --git a/paddle/pten/kernels/scale_kernel.h b/paddle/pten/kernels/scale_kernel.h index e69de29bb2..ba16db566b 100644 --- a/paddle/pten/kernels/scale_kernel.h +++ b/paddle/pten/kernels/scale_kernel.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/empty_kernel.h" +namespace pten { + +template +void ScaleKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + DenseTensor* out); + +template +DenseTensor Scale(const ContextT& dev_ctx, + const DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale) { + auto out_meta = UnchangedInferMeta(x.meta()); + auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + ScaleKernel( + dev_ctx, x, scale, bias, bias_after_scale, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/kernels/sign_kernel.h b/paddle/pten/kernels/sign_kernel.h index ba205fc96a..304b640d2a 100644 --- a/paddle/pten/kernels/sign_kernel.h +++ b/paddle/pten/kernels/sign_kernel.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/include/infermeta.h" +#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/kernels/empty_kernel.h" namespace pten { diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index e69de29bb2..41143826c4 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -0,0 +1,279 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "glog/logging.h" + +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/api/lib/api_registry.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/common/scalar_array.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/scale_kernel.h" + +namespace paddle { +namespace experimental { + +PADDLE_API Tensor scale_kernel_context(const Tensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "scale", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "scale API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << "scale API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + auto kernel_context = pten::KernelContext(dev_ctx); + + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x); + + kernel_context.EmplaceBackAttr(pten::Scalar(scale)); + kernel_context.EmplaceBackAttr(bias); + kernel_context.EmplaceBackAttr(bias_after_scale); + + auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); + kernel_context.EmplaceBackOutput(dense_out); + + Tensor out; + out.set_impl(dense_out); + + kernel(&kernel_context); + return out; +} + +static void ScaleCPU(DataType kernel_dtype, + const pten::CPUContext& dev_ctx, + const pten::DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + pten::DenseTensor* dense_out) { + switch (kernel_dtype) { + case pten::DataType::FLOAT64: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::FLOAT32: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::BFLOAT16: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT64: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT32: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT16: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT8: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::UINT8: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + default: { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported data type." + "Only Float64, Float32, BFloat16, Int64, Int32, Int16, Int8, UInt8 " + "are supported for now.")); + break; + } + } +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +static void ScaleGPU(DataType kernel_dtype, + const pten::GPUContext& dev_ctx, + const pten::DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + pten::DenseTensor* dense_out) { + switch (kernel_dtype) { + case pten::DataType::FLOAT64: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::FLOAT32: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::FLOAT16: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT64: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT32: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT16: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::INT8: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + case pten::DataType::UINT8: { + pten::ScaleKernel( + dev_ctx, x, pten::Scalar(scale), bias, bias_after_scale, dense_out); + break; + } + default: { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported data type." + "Only Float64, Float32, Float16, Int64, Int32, Int16, Int8, UInt8 " + "are " + "supported for now.")); + break; + } + } +} +#endif + +Tensor scale_switch_case(const Tensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "scale", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "scale API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << "scale API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto dense_x = std::dynamic_pointer_cast(x.impl()); + + auto out_meta = pten::UnchangedInferMeta(dense_x->meta()); + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); + + Tensor out; + out.set_impl(dense_out); + + switch (kernel_backend) { + case Backend::CPU: + ScaleCPU(kernel_data_type, + static_cast(*dev_ctx), + *dense_x, + scale, + bias, + bias_after_scale, + dense_out.get()); + break; +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + case Backend::GPU: + ScaleGPU(kernel_data_type, + static_cast(*dev_ctx), + *dense_x, + scale, + bias, + bias_after_scale, + dense_out.get()); + break; +#endif + default: + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported backend." + "Only CPU and CUDA Backend are supported for now." + "Please double check if your backend falls into the above two " + "categories.")); + } + + return out; +} + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/tests/kernels/test_scale_dev_api.cc b/paddle/pten/tests/kernels/test_scale_dev_api.cc index e69de29bb2..fe26f56552 100644 --- a/paddle/pten/tests/kernels/test_scale_dev_api.cc +++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc @@ -0,0 +1,116 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/pten/kernels/scale_kernel.h" + +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" + +namespace pten { +namespace tests { + +namespace framework = paddle::framework; +using DDim = paddle::framework::DDim; + +TEST(DEV_API, scale) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + + auto* dense_x_data = dense_x.mutable_data(); + for (size_t i = 0; i < 12; ++i) { + dense_x_data[i] = i * 1.0; + } + float scale = 2; + float bias = 1; + bool bias_after_scale = true; + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Scale( + *(static_cast(dev_ctx)), + dense_x, + scale, + bias, + bias_after_scale); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.numel(), 12); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = 23; + auto actual_result = out.data()[11]; + ASSERT_NEAR(expect_result, actual_result, 1e-6f); +} + +TEST(DEV_API, scale_host) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + for (size_t i = 0; i < 12; ++i) { + dense_x_data[i] = i * 1.0; + } + const auto alloc2 = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor scale(alloc2, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({1}), + pten::DataLayout::NCHW)); + scale.mutable_data()[0] = 2; + float bias = 1; + bool bias_after_scale = true; + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Scale( + *(static_cast(dev_ctx)), + dense_x, + scale, + bias, + bias_after_scale); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.numel(), 12); + ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = 23; + auto actual_result = out.data()[11]; + ASSERT_NEAR(expect_result, actual_result, 1e-6f); +} + +} // namespace tests +} // namespace pten diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index e69de29bb2..e8539b11d1 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -0,0 +1,435 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml +import argparse + + +class API: + prefix_tensor_name = 'dense_' + + def __init__(self, api_item_yaml): + self.api = api_item_yaml['api'] + # args: + # inputs: + # names : [], list of input names + # attrs: + # names : [], list of attribute names + # attr_info : { attr_name : (type, default_values)} + self.args = self.parse_args(api_item_yaml['args']) + self.output = api_item_yaml['output'] + self.is_base_api = True + if 'invoke' in api_item_yaml: + self.is_base_api = False + self.invoke = api_item_yaml['invoke'] + else: + self.kernel = api_item_yaml['kernel'] + if 'backend' not in self.kernel or len(self.kernel['backend']) == 0: + self.kernel['backend'] = None + if 'layout' not in self.kernel or len(self.kernel['layout']) == 0: + self.kernel['layout'] = None + if 'data_type' not in self.kernel or len(self.kernel[ + 'data_type']) == 0: + self.kernel['data_type'] = None + if 'param' not in self.kernel: + self.kernel['param'] = None + + self.infer_meta = api_item_yaml['infer_meta'] + if 'param' not in self.infer_meta: + self.infer_meta['param'] = None + + def parse_args(self, args_str): + inputs = {'names': []} + attrs = {'names': [], 'attr_info': {}} + args_str = args_str.strip() + assert args_str.startswith('(') and args_str.endswith(')'), \ + f"Args declaration should start with '(' and end with ')', please check the args of {self.api} in api.yaml." + args_str = args_str[1:-1] + args_list = args_str.split(',') + input_types = ['const Tensor&', 'const Tensor &'] + attr_types = ['const Scalar&', 'const Scalar &', 'const ScalarArray&', 'const ScalarArray &', \ + 'int', 'int32_t', 'int64_t', 'size_t', 'float', 'double', 'bool', \ + 'const std::vector&', 'Backend', 'DataLayout', 'DataType'] + args_declare_str = "" + args_define_str = "" + for item in args_list: + item = item.strip() + # match the input tensor + has_input = False + for in_type in input_types: + if item.startswith(in_type): + input_name = item[len(in_type):].strip() + assert len(input_name) > 0, \ + f"The input tensor name should not be empty. Please check the args of {self.api} in api.yaml." + inputs['names'].append(input_name) + args_declare_str = args_declare_str + in_type + ' ' + input_name + ', ' + args_define_str = args_define_str + in_type + ' ' + input_name + ', ' + has_input = True + break + if has_input: + continue + + # match the attribute + for attr_type in attr_types: + if item.startswith(attr_type): + attr_name = item[len(attr_type):].strip() + assert len(attr_name) > 0, \ + f"The attribute name should not be empty. Please check the args of {self.api} in api.yaml." + default_value = None + if '=' in attr_name: + attr_infos = attr_name.split('=') + attr_name = attr_infos[0].strip() + default_value = attr_infos[1].strip() + + default_value_str = "" if default_value is None else '=' + default_value + args_declare_str = args_declare_str + attr_type + ' ' + attr_name + default_value_str + ', ' + args_define_str = args_define_str + attr_type + ' ' + attr_name + ', ' + attrs['names'].append(attr_name) + attrs['attr_info'][attr_name] = (attr_type, default_value) + break + + args = { + 'inputs': inputs, + 'attrs': attrs, + 'args_declare': args_declare_str[:-2], + 'args_define': args_define_str[:-2] + } + return args + + def gene_api_declaration(self): + return f""" +PADDLE_API {self.output} {self.api}({self.args['args_declare']}); +""" + + def gene_kernel_select(self, input_names, attrs, kernel): + + kernel_key_item_init = """ + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; +""" + # Check the tensor options + attr_backend_count = 0 + attr_layout_count = 0 + attr_data_type_count = 0 + for attr_name in attrs['names']: + if attrs['attr_info'][attr_name][0] == 'Backend': + assert kernel['backend'] is not None, \ + f"{self.api} api: When there is a parameter with 'Backend' type in attributes, you must set backend of kernel manually." + attr_backend_count = attr_backend_count + 1 + if attrs['attr_info'][attr_name][0] == 'DataLayout': + assert kernel['layout'] is not None, \ + f"{self.api} api: When there is a parameter with 'DataLayout' type in attributes, you must set layout of kernel manually." + attr_layout_count = attr_layout_count + 1 + if attrs['attr_info'][attr_name][0] == 'DataType': + assert kernel['data_type'] is not None, \ + f"{self.api} api: When there is a parameter with 'DataType' type in attributes, you must set data_type of kernel manually." + attr_data_type_count = attr_data_type_count + 1 + + # preprocess kernel configures + kernel_select_code = "" + if kernel['backend'] is not None: + if '>' in kernel['backend']: + vars_list = kernel['backend'].split('>') + assert len( + vars_list + ) == 2, f"{self.api} api: The number of params to set backend with '>' only allows 2, but received {len(vars_list)}." + assert (vars_list[0].strip() in attrs['names']) and (attrs['attr_info'][vars_list[0].strip()][0] == 'Backend'), \ + f"{self.api} api: When use '>' to set kernel backend, the first param should be a attribute with Backend type." + kernel_select_code = kernel_select_code + f""" + kernel_backend = ParseBackendWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()}); +""" + + else: + args_str = "" + for ele in kernel['backend'].split(','): + args_str = args_str + ele.strip() + ', ' + kernel_select_code = kernel_select_code + f""" + kernel_backend = ParseBackend({args_str[:-2]}); +""" + + if kernel['layout'] is not None: + if '>' in kernel['layout']: + vars_list = kernel['layout'].split('>') + assert len( + vars_list + ) == 2, f"{self.api} api: The number of params to set layout with '>' only allows 2, but received {len(vars_list)}." + assert vars_list[0].strip() in attrs['names'] and attrs['attr_info'][vars_list[0].strip()][0] == 'DataLayout', \ + f"{self.api} api: When use '>' to set kernel layout, the first param should be a attribute with DataLayout type." + kernel_select_code = kernel_select_code + f""" + kernel_layout = ParseLayoutWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()}); +""" + + else: + vars_list = kernel['layout'].split(',') + assert len( + vars_list + ) == 1, f"{self.api} api: The number of params to set layout must be 1, but received {len(vars_list)}." + kernel_select_code = kernel_select_code + f""" + kernel_layout = ParseLayout({vars_list[0].strip()}); +""" + + if kernel['data_type'] is not None: + if '>' in kernel['data_type']: + vars_list = kernel['data_type'].split('>') + assert len( + vars_list + ) == 2, f"{self.api} api: The number of params to set data_type with '>' only allows 2, but received {len(vars_list)}." + assert vars_list[0].strip() in attrs['names'] and attrs['attr_info'][vars_list[0].strip()][0] == 'DataType', \ + f"{self.api} api: When use '>' to set kernel data_type, the first param should be a attribute with DataType type." + kernel_select_code = kernel_select_code + f""" + kernel_data_type = ParseDataTypeWithInputOrder({vars_list[0].strip()}, {vars_list[1].strip()}); +""" + + else: + vars_list = kernel['data_type'].split(',') + assert len( + vars_list + ) == 1, f"{self.api} api: The number of params to set data_type only allows 2, but received {len(vars_list)}." + kernel_select_code = kernel_select_code + f""" + kernel_data_type = ParseDataType({vars_list[0].strip()}); +""" + + if len(input_names) == 0: + assert attr_backend_count > 0 and attr_layout_count > 0 and attr_data_type_count > 0, \ + f"{self.api} api: When there is no input tensor, the args must have 'Backend', 'DataLayout' and 'DataType'." + + kernel_select_args = "" + for input_name in input_names: + kernel_select_args = kernel_select_args + input_name + ", " + + if len(kernel_select_args) > 2: + kernel_select_args = kernel_select_args[:-2] + + kernel_select_code = kernel_key_item_init + kernel_select_code + + if len(input_names) > 0: + kernel_select_code = kernel_select_code + f""" + if (kernel_backend == Backend::UNDEFINED + || kernel_layout == DataLayout::UNDEFINED + || kernel_data_type == DataType::UNDEFINED ) {{ + auto kernel_key_set = ParseKernelKeyByInputArgs({kernel_select_args}); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) {{ + kernel_backend = kernel_key.backend(); + }} + if (kernel_layout == DataLayout::UNDEFINED) {{ + kernel_layout = kernel_key.layout(); + }} + if (kernel_data_type == DataType::UNDEFINED) {{ + kernel_data_type = kernel_key.dtype(); + }} + }}""" + + kernel_select_code = kernel_select_code + f""" + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "{kernel['func']}", {{kernel_backend, kernel_layout, kernel_data_type}}); + VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; + VLOG(6) << "{self.api} API kernel: " << kernel;""" + + return kernel_select_code + + def gene_infer_meta(self, input_names, attr_names, infer_meta) -> str: + infer_meta_params = infer_meta['param'] if infer_meta[ + 'param'] is not None else input_names + attr_names + param_code = "" + for param in infer_meta_params: + if param in input_names: + param_code = param_code + self.prefix_tensor_name + param + "->meta(), " + elif param in attr_names: + param_code = param_code + param + ", " + elif isinstance(param, str): + param_code = param_code + "\"" + param + "\", " + elif isinstance(param, bool): + param_code = param_code + str(param).lower() + ", " + else: + param_code = param_code + str(param) + ", " + + param_code = param_code[:-2] + return f""" + auto out_meta = pten::{infer_meta['func']}({param_code}); +""" + + def get_kernel_args(self, input_names, attrs, kernel_param): + input_tensor_code = "" + for input_name in input_names: + # set input code + input_tensor_code = input_tensor_code + f""" + auto {self.prefix_tensor_name}{input_name} = std::dynamic_pointer_cast({input_name}.impl());""" + + attr_names = attrs['names'] + if kernel_param is None: + kernel_param = input_names + attr_names + + kernel_args = "*dev_ctx, " + for param in kernel_param: + if param in input_names: + kernel_args = kernel_args + "*" + self.prefix_tensor_name + param + ", " + elif param in attr_names: + # set attr for kernel_context + if 'ScalarArray' in attrs['attr_info'][param][0]: + param = 'pten::ScalarArray(' + param + ')' + elif 'Scalar' in attrs['attr_info'][param][0]: + param = 'pten::Scalar(' + param + ')' + kernel_args = kernel_args + param + ", " + elif isinstance(param, bool): + kernel_args = kernel_args + str(param).lower() + ", " + else: + kernel_args = kernel_args + str(param) + ", " + return input_tensor_code, kernel_args[:-2] + + def gene_api_code(self): + if self.is_base_api: + input_tensors, kernel_args = self.get_kernel_args( + self.args['inputs']['names'], self.args['attrs'], + self.kernel['param']) + return f""" +PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{ +{self.gene_kernel_select(self.args['inputs']['names'], self.args['attrs'], self.kernel)} + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); +{input_tensors} +{self.gene_infer_meta(self.args['inputs']['names'], self.args['attrs']['names'], self.infer_meta)} + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToFluidPlace(kernel_backend)), + std::move(out_meta)); + + Tensor out; + out.set_impl(dense_out); + + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)({kernel_args}, dense_out.get()); + + return out; +}} +""" + + else: + return f""" +PADDLE_API {self.output} {self.api}({self.args["args_define"]}) {{ + return {self.invoke}; +}} +""" + + +def header_include(): + return """ +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/common/scalar_array.h" +""" + + +def source_include(header_file_path): + return f""" +#include "{header_file_path}" +#include + +#include "glog/logging.h" + +#include "paddle/pten/api/include/kernel_signature.h" +#include "paddle/pten/api/lib/api_registry.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/infermeta/binary.h" +#include "paddle/pten/infermeta/multiary.h" +#include "paddle/pten/infermeta/nullary.h" +#include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/declarations.h" +""" + + +def api_register(): + return """ +PT_REGISTER_API(Creation); +PT_REGISTER_API(Linalg); +PT_REGISTER_API(Manipulation); +PT_REGISTER_API(Math); +""" + + +def api_namespace(): + return (""" +namespace paddle { +namespace experimental { + +""", """ + +} // namespace experimental +} // namespace paddle +""") + + +def generate_api(api_yaml_path, header_file_path, source_file_path): + + with open(api_yaml_path, 'r') as f: + apis = yaml.load(f, Loader=yaml.FullLoader) + header_file = open(header_file_path, 'w') + source_file = open(source_file_path, 'w') + + namespace = api_namespace() + + header_file.write("#pragma once\n") + header_file.write(header_include()) + header_file.write(namespace[0]) + + include_header_file = "paddle/pten/api/include/api.h" + source_file.write(source_include(include_header_file)) + source_file.write(namespace[0]) + + for api in apis: + api_code = API(api) + print(api_code.gene_api_declaration()) + header_file.write(api_code.gene_api_declaration()) + source_file.write(api_code.gene_api_code()) + + header_file.write(namespace[1]) + source_file.write(namespace[1]) + source_file.write(api_register()) + + header_file.close() + source_file.close() + + +def main(): + parser = argparse.ArgumentParser( + description='Generate PaddlePaddle C++ API files') + parser.add_argument( + '--api_yaml_path', + help='path to yaml file directory', + default='python/paddle/utils/code_gen/api.yaml') + parser.add_argument( + '--api_header_path', + help='output of generated api header code file', + default='paddle/pten/api/include/api.h') + + parser.add_argument( + '--api_source_path', + help='output of generated api source code file', + default='paddle/pten/api/lib/api.cc') + + options = parser.parse_args() + + api_yaml_path = options.api_yaml_path + header_file_path = options.api_header_path + source_file_path = options.api_source_path + + generate_api(api_yaml_path, header_file_path, source_file_path) + + +if __name__ == '__main__': + main() -- Gitee