From 070c127fb429b4745560b4c022fb75323265d066 Mon Sep 17 00:00:00 2001
From: PaddlePaddle-Gardener <paddlepaddle_bot@163.com>
Date: Wed, 12 Jan 2022 14:40:39 +0800
Subject: [PATCH] mirgate_38576

---
 .../eager/accumulation/accumulation_node.cc   |  84 +++++
 .../eager/accumulation/accumulation_node.h    |  43 +++
 paddle/fluid/eager/eager_tensor.h             | 322 +++++++++++++++++
 .../data_structure_tests/eager_tensor_test.cc | 132 +++++++
 .../performance_tests/benchmark_utils.cc      | 328 ++++++++++++++++++
 .../eager/tests/task_tests/generated_test.cc  | 127 +++++++
 paddle/fluid/pybind/eager_method.cc           |  58 +++-
 paddle/fluid/pybind/eager_properties.cc       | 192 ++++++++++
 .../fluid/dygraph/varbase_patch_methods.py    |  32 +-
 9 files changed, 1297 insertions(+), 21 deletions(-)

diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc
index e69de29bb2..ed1146eed0 100644
--- a/paddle/fluid/eager/accumulation/accumulation_node.cc
+++ b/paddle/fluid/eager/accumulation/accumulation_node.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/accumulation/accumulation_node.h"
+#include "paddle/fluid/eager/accumulation/gradient_accumulation.h"
+#include "paddle/fluid/eager/eager_tensor.h"
+
+#include "paddle/pten/api/all.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/errors.h"
+
+#include "glog/logging.h"
+
+static void CopyOrAddTensor(egr::EagerTensor* tensor,
+                            const egr::EagerTensor& t) {
+  if (t.Var().IsInitialized()) {
+    const_cast<egr::EagerTensor*>(&t)->SyncToTensor();
+  }
+  if (!tensor->defined() || !tensor->initialized()) {
+    // Simply copy tensor->impl
+    *tensor = t;
+  } else {
+    // Accumulation
+    egr::TensorAdd(t, tensor);
+  }
+}
+
+namespace egr {
+
+void GradNodeAccumulation::RetainGrad(
+    const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook) {
+  retain_grad_hook_ = hook;
+}
+
+std::vector<std::vector<egr::EagerTensor>> GradNodeAccumulation::operator()(
+    const std::vector<std::vector<egr::EagerTensor>>& grads) {
+  PADDLE_ENFORCE(grads.size() == 1,
+                 paddle::platform::errors::Fatal(
+                     "GradNodeAccumulation should take exactly 1 grad tensor"
+                     "However received: %d slot.",
+                     grads.size()));
+  PADDLE_ENFORCE(grads[0].size() == 1,
+                 paddle::platform::errors::Fatal(
+                     "GradNodeAccumulation should take exactly 1 grad tensor"
+                     "However received: %d in slot %d .",
+                     grads[0].size(), 0));
+  // Apply Gradient Hooks
+  if (GradientHooksRegistered()) {
+    std::vector<std::vector<egr::EagerTensor>> hooked_grads =
+        ApplyGradientHooks(grads);
+    // TODO(jiabin): It's little weird
+    CopyOrAddTensor(&accumulated_grad, hooked_grads[0][0]);
+  } else {
+    CopyOrAddTensor(&accumulated_grad, grads[0][0]);
+  }
+
+  if (retain_grad_hook_ != nullptr) {
+    retain_grad_hook_(accumulated_grad);
+  }
+
+  // Apply Reduce Hooks
+  if (ReduceHooksRegistered()) {
+    ApplyReduceHooks();
+  }
+
+  return {{accumulated_grad}};
+}
+
+}  // namespace egr
diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h
index e69de29bb2..9578924b78 100644
--- a/paddle/fluid/eager/accumulation/accumulation_node.h
+++ b/paddle/fluid/eager/accumulation/accumulation_node.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/eager/grad_node_info.h"
+
+namespace egr {
+
+class GradNodeAccumulation : public GradNodeBase {
+ public:
+  // Constructor: configure fwd input tensors to grad node
+  GradNodeAccumulation() : GradNodeBase(1, 1) { SetDefaultGradInOutMeta(); }
+
+  ~GradNodeAccumulation() override = default;
+
+  // Functor: perform backward computations
+  virtual std::vector<std::vector<egr::EagerTensor>> operator()(
+      const std::vector<std::vector<egr::EagerTensor>>& grads) override;
+
+  void RetainGrad(
+      const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook);
+
+  egr::EagerTensor* Grad() { return &accumulated_grad; }
+
+ private:
+  egr::EagerTensor accumulated_grad;
+
+  std::function<egr::EagerTensor(const egr::EagerTensor&)> retain_grad_hook_;
+};
+
+}  // namespace egr
diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h
index e69de29bb2..72fe5732e9 100644
--- a/paddle/fluid/eager/eager_tensor.h
+++ b/paddle/fluid/eager/eager_tensor.h
@@ -0,0 +1,322 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+// framework deps
+#include "paddle/fluid/framework/pten_utils.h"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/framework/variable.h"
+// pten deps
+#include "paddle/pten/all.h"
+#include "paddle/pten/api/all.h"
+#include "paddle/pten/api/lib/api_declare.h"
+#include "paddle/pten/api/lib/utils/tensor_utils.h"
+#include "paddle/pten/core/convert_utils.h"
+/**
+ * This class is used by Eager mode for now. It's painful to do this in Eager
+ * Mode, the better
+ * choice is to use paddle::experimental::Tensor directly. However, we have a
+ * punch of nested kernel code, and
+ * they use paddle::framework::Variable in inner logic code. So, we have to
+ * provide variable in
+ * paddle::framework::ExecutionContext to support it. We should remove this as
+ * soon as we finish our latest
+ * Pten Lib, and use paddle::experimental::Tensor instead.
+ *
+ * Note: Keep this class as clean as possible.
+ * This class should only support method declared in
+ * paddle::experimental::Tensor with access method of
+ * paddle::framework::Variable no more members are acceptable.
+ * **/
+
+namespace egr {
+class EagerTensor final {
+ public:
+  /* Part 1: Constructors */
+  EagerTensor()
+      : tensor_(std::make_shared<paddle::experimental::Tensor>()),
+        var_(paddle::framework::Variable()) {}
+  explicit EagerTensor(const std::string& name)
+      : tensor_(std::make_shared<paddle::experimental::Tensor>(name)),
+        var_(paddle::framework::Variable()) {}
+  /**
+   * @description: Use a TensorImpl pointer to construct a Tensor
+   * @param {shared_ptr<TensorBase>} tensor_impl
+   * @return {Tensor}
+   */
+  explicit EagerTensor(const std::shared_ptr<pten::TensorBase>& tensor_impl)
+      : tensor_(std::make_shared<paddle::experimental::Tensor>(tensor_impl)),
+        var_(paddle::framework::Variable()) {}
+
+  EagerTensor(const EagerTensor&) = default;
+  EagerTensor(EagerTensor&&) = default;
+
+  /* Part 2: Name access methods */
+  /**
+   * @description: Return the name of current Tensor.
+   * @param None
+   * @return {const std::string&}
+   */
+  const std::string& name() const { return tensor_->name(); }
+  /**
+   * @description: Set the name of current Tensor.
+   * @param {const std::string& name}
+   * @return None
+   */
+  void set_name(const std::string& name) { tensor_->set_name(name); }
+
+  /* Part 3: Dimension, DataType and DataLayout methods */
+  /**
+   * @description: Return the number of elements of current Tensor.
+   * @param None
+   * @return {int64_t}
+   */
+  int64_t numel() const { return tensor_->numel(); }
+  /**
+   * @description: Return the shape (dimensions) of current Tensor.
+   * @param None
+   * @return {DDim}
+   */
+  paddle::framework::DDim shape() const { return tensor_->dims(); }
+
+  /**
+   * @description: Return the data type of current Tensor.
+   * @param None
+   * @return {DataType}
+   */
+  paddle::experimental::DataType type() const { return tensor_->type(); }
+
+  /**
+   * @description: Return the layout of current Tensor.
+   * @param None
+   * @return {DataLayout}
+   */
+  paddle::experimental::DataLayout layout() const { return tensor_->layout(); }
+
+  /* Part 3: Device and Backend methods */
+  /**
+   * @description: Return the place (device) of current Tensor.
+   * @param None
+   * @return {Place}
+   */
+  paddle::platform::Place place() const { return tensor_->inner_place(); }
+
+  /**
+   * Backend judgment APIs, shield the concept of Backend.
+   */
+  bool is_cpu() const { return paddle::platform::is_cpu_place(place()); }
+  bool is_cuda() const { return paddle::platform::is_gpu_place(place()); }
+
+  /* Part 4: Data Access methods */
+  /**
+   * @description: Return the implemention of current Tensor.
+   * @param None
+   * @return {std::shared_ptr<TensorBase>}
+   */
+  std::shared_ptr<pten::TensorBase> impl() const { return tensor_->impl(); }
+
+  /**
+   * @description: Set the implemention of current Tensor.
+   * @param {std::shared_ptr<TensorBase>}
+   * @return None
+   */
+  void set_impl(const std::shared_ptr<pten::TensorBase>& impl) {
+    tensor_->set_impl(impl);
+  }
+
+  // TODO(chenweihang): Whether API Tensor need `data` and `mutable_data`?
+
+  // TODO(chenweihang): slice and split methods use kernels?
+
+  /* Part 5: Status utils methods */
+  /**
+   * @description: Determine whether it is a meaningful Tensor
+   * @param None
+   * @return {bool}
+   */
+  bool defined() const { return tensor_->defined(); }
+
+  /**
+   * @description: Determine whether Tensor is initialized
+   * @param None
+   * @return {bool}
+   */
+  bool initialized() const { return tensor_->initialized(); }
+
+  bool safe_initialized() const {
+    return initialized() || var_.IsInitialized();
+  }
+
+  /**
+   * @description: Reset the Tensor implementation
+   * @param None
+   * @return {void}
+   */
+  void reset() { tensor_->reset(); }
+
+  /**
+ * @brief Transfer the current Tensor to the specified device and return.
+ *
+ * @param place, the target place of which the tensor will copy to.
+ * @return Tensor
+ */
+  // TODO(chenweihang): replace Backend by new Place
+  EagerTensor copy_to(pten::Backend backend, bool blocking) const {
+    if (Var().IsInitialized()) {
+      const_cast<EagerTensor*>(this)->SyncToTensor();
+    }
+    return EagerTensor(tensor_->copy_to(backend, blocking));
+  }
+
+  /**
+ * @brief Transfer the source Tensor to current Tensor.
+ *
+ * @param src, the source Tensor to be copied.
+ * @param blocking, Should we copy this in sync way.
+ * @return void
+ */
+  void copy_(const EagerTensor& src, const bool blocking) {
+    if (src.Var().IsInitialized()) {
+      const_cast<EagerTensor*>(&src)->SyncToTensor();
+    }
+    if (Var().IsInitialized()) {
+      SyncToTensor();
+    }
+    tensor_->copy_(*(src.tensor_.get()), blocking);
+  }
+
+  /* Part 6: Operator overloading */
+  EagerTensor& operator=(const EagerTensor& x) & {
+    tensor_ = x.tensor_;
+    var_ = x.var_;
+    return *this;
+  }
+  EagerTensor& operator=(EagerTensor&& x) & {
+    tensor_ = std::move(x.tensor_);
+    var_ = std::move(x.var_);
+    return *this;
+  }
+
+  /* Part 7: Autograd methods */
+  paddle::experimental::AbstractAutogradMeta* get_autograd_meta() const {
+    return tensor_->get_autograd_meta();
+  }
+  void set_autograd_meta(
+      std::shared_ptr<paddle::experimental::AbstractAutogradMeta>
+          autograd_meta) {
+    tensor_->set_autograd_meta(autograd_meta);
+  }
+
+  /** Part 9: Get framework::Variable from EagerTensor **/
+  const paddle::framework::Variable& Var() const { return var_; }
+
+  paddle::framework::Variable* MutableVar() { return &var_; }
+
+  /** Part 10: Sync paddle::framework::Variable with pten::Tensor **/
+  void SyncToVar(paddle::framework::proto::VarType_Type type =
+                     paddle::framework::proto::VarType::LOD_TENSOR) {
+    // Synchronize allocation only once.
+    if (!var_.IsInitialized()) {
+      // TODO(jiabin): Support selected rows later.
+      if (this->initialized()) {
+        if (type == paddle::framework::proto::VarType::LOD_TENSOR) {
+          auto* framework_tensor =
+              var_.GetMutable<paddle::framework::LoDTensor>();
+          framework_tensor->Resize(tensor_->dims());
+          framework_tensor->set_layout(tensor_->layout());
+          // Contruct framework::Tensor from egr::EagerTensor
+          auto tensor_dense =
+              std::dynamic_pointer_cast<pten::DenseTensor>(tensor_->impl());
+          if (tensor_dense) {
+            paddle::experimental::SharesStorage(tensor_dense.get(),
+                                                framework_tensor);
+          } else {
+            PADDLE_THROW(paddle::platform::errors::Fatal(
+                "Unrecognized egr::EagerTensor type, only "
+                "DenseTensor is supported for now."));
+          }
+        }
+      } else {
+        PADDLE_THROW(paddle::platform::errors::Fatal(
+            "Can not Sync EagerTensor %s whose "
+            "pten::DenseTensor is not initialized!",
+            name()));
+      }
+    }
+  }
+  /** Part 11: Sync paddle::framework::Variable with pten::Tensor **/
+  void SyncToTensor() {
+    // Synchronize allocation only once.
+    if (var_.IsInitialized()) {
+      if (var_.IsType<paddle::framework::LoDTensor>()) {
+        SetImplWithLegacyTensor<paddle::framework::LoDTensor,
+                                pten::DenseTensor>();
+      } else if (var_.IsType<paddle::framework::Tensor>()) {
+        SetImplWithLegacyTensor<paddle::framework::Tensor, pten::DenseTensor>();
+      } else {
+        PADDLE_THROW(
+            paddle::platform::errors::Fatal("Unable to fetch underlying tensor "
+                                            "from VarBase, only LoDTensor and "
+                                            "Tensor are supported for now"));
+      }
+    } else {
+      PADDLE_THROW(paddle::platform::errors::Fatal(
+          "Can not Sync EagerTensor %s whose paddle::framework::Variable is "
+          "not initialized!",
+          name()));
+    }
+  }
+
+  void ResetVar(const paddle::framework::Variable& src) { var_ = src; }
+
+  const std::shared_ptr<paddle::experimental::Tensor>& Tensor() const {
+    return tensor_;
+  }
+
+  void set_tensor(const std::shared_ptr<paddle::experimental::Tensor>& tensor) {
+    tensor_ = tensor;
+  }
+
+ private:
+  template <typename LEGACY_TYPE, typename TYPE>
+  void SetImplWithLegacyTensor() {
+    const auto& framework_tensor = var_.Get<LEGACY_TYPE>();
+    if (this->initialized()) {
+      VLOG(8) << "Sync Var to initialized tensor for: " << name();
+      paddle::experimental::ReMakePtenDenseTensor(
+          framework_tensor,
+          static_cast<pten::DenseTensor*>(this->impl().get()));
+    } else {
+      VLOG(8) << "Sync Var to uninitialized tensor for: " << name();
+      this->set_impl(std::move(
+          paddle::experimental::MakePtenDenseTensor(framework_tensor)));
+    }
+    var_.Clear();
+  }
+
+ private:
+  /**
+  * @description: Use a pten::Tensor pointer to construct a EagerTensor, never
+  * public this!!!!.
+  * @param {pten::Tensor} tensor
+  * @return {EagerTensor}
+  */
+  explicit EagerTensor(const paddle::experimental::Tensor& tensor)
+      : tensor_(std::make_shared<paddle::experimental::Tensor>(tensor)),
+        var_(paddle::framework::Variable()) {}
+
+  std::shared_ptr<paddle::experimental::Tensor> tensor_ = nullptr;
+  paddle::framework::Variable var_;
+};
+}  // namespace egr
diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
index e69de29bb2..84daf4eac4 100644
--- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
+++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
@@ -0,0 +1,132 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/pten/api/lib/utils/allocator.h"
+
+namespace eager_test {
+using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
+class AutogradMetaTest : public AbstractAutogradMeta {
+ public:
+  explicit AutogradMetaTest(int val) : val_(val) {}
+  int val_ = 0;
+};
+}
+TEST(EagerTensor, Constructor) {
+  egr::EagerTensor et1 = egr::EagerTensor();
+  egr::EagerTensor et2 = egr::EagerTensor("et2");
+
+  CHECK_EQ(et1.defined(), false);
+  CHECK_EQ(et2.name(), "et2");
+
+  pten::DenseTensorMeta meta = pten::DenseTensorMeta(
+      pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2}));
+  std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          paddle::platform::CPUPlace()),
+      meta);
+  auto* dt_ptr = dt->mutable_data<float>();
+  dt_ptr[0] = 5.0f;
+  dt_ptr[1] = 10.0f;
+  egr::EagerTensor et3 = egr::EagerTensor(dt);
+  auto* et3_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>();
+  CHECK_EQ(et3_ptr[0], 5.0f);
+  CHECK_EQ(et3_ptr[1], 10.0f);
+  // copy constructor
+  egr::EagerTensor et4(et3);
+  auto* et4_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et4.impl())->data<float>();
+  CHECK_EQ(et4_ptr[0], 5.0f);
+  CHECK_EQ(et4_ptr[1], 10.0f);
+  egr::EagerTensor et5(std::move(et4));
+  auto* et5_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>();
+  CHECK_EQ(et5_ptr[0], 5.0f);
+  CHECK_EQ(et5_ptr[1], 10.0f);
+}
+
+TEST(EagerTensor, MemberFunction) {
+  egr::EagerTensor et3;
+  pten::DenseTensorMeta meta = pten::DenseTensorMeta(
+      pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2}));
+  std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          paddle::platform::CPUPlace()),
+      meta);
+  auto* dt_ptr = dt->mutable_data<float>();
+  dt_ptr[0] = 5.0f;
+  dt_ptr[1] = 10.0f;
+  VLOG(6) << "Make Dense Tensor";
+  et3.set_name("et3");
+  VLOG(6) << "Set Name";
+  CHECK_EQ(et3.name(), "et3");
+  CHECK_EQ(et3.defined(), false);
+  et3.set_impl(dt);
+  VLOG(6) << "Set impl";
+  CHECK_EQ(et3.initialized(), true);
+  CHECK_EQ(et3.is_cpu(), true);
+  CHECK_EQ(et3.is_cuda(), false);
+  CHECK_EQ(et3.numel(), 2);
+  auto expected_dim = paddle::framework::make_ddim({1, 2});
+  CHECK_EQ(et3.shape(), expected_dim);
+  CHECK_EQ(et3.type(), paddle::experimental::DataType::FLOAT32);
+  CHECK_EQ(et3.layout(), paddle::experimental::DataLayout::NCHW);
+  CHECK(paddle::platform::is_cpu_place(et3.place()));
+  VLOG(6) << "Get impl";
+  auto* dt3_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>();
+  CHECK_EQ(dt3_ptr[0], 5.0f);
+  CHECK_EQ(dt3_ptr[1], 10.0f);
+  egr::EagerTensor et4 = et3;
+  VLOG(6) << "copy =";
+  CHECK(et4.initialized() == true);
+  auto* dt4_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et4.impl())->data<float>();
+  CHECK_EQ(dt4_ptr[0], 5.0f);
+  CHECK_EQ(dt4_ptr[1], 10.0f);
+  VLOG(6) << "move =";
+  egr::EagerTensor et5 = std::move(et4);
+  auto* dt5_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>();
+  CHECK_EQ(dt5_ptr[0], 5.0f);
+  CHECK_EQ(dt5_ptr[1], 10.0f);
+  VLOG(6) << "AutogradMeta";
+  auto autograd_meta_test = std::make_shared<eager_test::AutogradMetaTest>(2);
+  et3.set_autograd_meta(autograd_meta_test);
+  auto* tmp_autograd_meta_test =
+      static_cast<eager_test::AutogradMetaTest*>(et3.get_autograd_meta());
+  CHECK_EQ(tmp_autograd_meta_test->val_, 2);
+  VLOG(6) << "SyncToVar";
+  et3.SyncToVar();
+  CHECK_EQ(et3.Var().Get<paddle::framework::LoDTensor>().data<float>()[0],
+           5.0f);
+  CHECK_EQ(et3.Var().Get<paddle::framework::LoDTensor>().data<float>()[1],
+           10.0f);
+  VLOG(6) << "SyncToTensor";
+  CHECK(et3.initialized() == true);
+  et3.SyncToTensor();
+  CHECK(et3.initialized() == true);
+  VLOG(6) << "Check Tensor";
+  auto* dt3_tmp_ptr =
+      std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>();
+  CHECK_EQ(dt3_tmp_ptr[0], 5.0f);
+  CHECK_EQ(dt3_tmp_ptr[1], 10.0f);
+  et3.reset();
+  CHECK(et3.defined() == false);
+  VLOG(6) << "Finish";
+}
diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
index e69de29bb2..e05a63a69d 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
@@ -0,0 +1,328 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h"
+
+#include <iostream>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+// Eager
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/backward.h"
+#include "paddle/fluid/eager/tests/test_utils.h"
+#include "paddle/fluid/eager/utils.h"
+
+// Eager Generated
+#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
+
+// Fluid
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/imperative/basic_engine.h"
+#include "paddle/fluid/imperative/tracer.h"
+#include "paddle/fluid/memory/memcpy.h"
+
+static size_t max_num_benchmark_runs = 5000;
+
+namespace egr {
+
+/* --------------------- */
+/* ---- Eager Scale ---- */
+/* --------------------- */
+void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) {
+  EagerTensor input_tensor = tensor;
+  float scale = 2.0;
+  float bias = 3.0;
+
+  size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs;
+  for (size_t i = 0; i < max_num_runs; i++) {
+    input_tensor =
+        egr::scale(input_tensor, scale, bias, true /*bias_after_scale*/,
+                   true /*trace_backward*/);
+  }
+
+  std::vector<EagerTensor> target_tensors = {input_tensor};
+  RunBackward(target_tensors, {});
+
+  if (accuracy_check) {
+    // Examine Forward Grad (w.r.t max_num_runs = 10)
+    eager_test::CompareTensorWithValue<float>(input_tensor, 8189.0);
+    // Examine Backward Grad (w.r.t max_num_runs = 10)
+    eager_test::CompareGradTensorWithValue<float>(tensor, 1024.0);
+  }
+}
+
+/* ----------------------------------- */
+/* ---- Eager Intermediate Matmul ---- */
+/* ----------------------------------- */
+void benchmark_eager_intermediate_matmul(const EagerTensor& X,
+                                         const EagerTensor& Y,
+                                         bool accuracy_check) {
+  EagerTensor input_tensor0 = X;
+
+  size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
+  for (size_t i = 0; i < max_num_runs; i++) {
+    input_tensor0 = matmul_v2_dygraph_function(
+        input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}});
+  }
+
+  std::vector<EagerTensor> target_tensors = {input_tensor0};
+  RunBackward(target_tensors, {});
+
+  if (accuracy_check) {
+    // Examine Forward Grad (w.r.t max_num_runs = 2)
+    eager_test::CompareVariableWithValue<float>(input_tensor0, 16);
+    // Examine Backward Grad (w.r.t max_num_runs = 2)
+    eager_test::CompareGradTensorWithValue<float>(X, 16);
+    eager_test::CompareGradTensorWithValue<float>(Y, 16);
+  }
+}
+
+/* -------------------------------- */
+/* ---- Eager Intermediate MLP ---- */
+/* -------------------------------- */
+void benchmark_eager_intermediate_mlp(const EagerTensor& X,
+                                      const std::vector<EagerTensor>& Ws,
+                                      const std::vector<EagerTensor>& Bs,
+                                      bool accuracy_check) {
+  EagerTensor input0 = X;
+
+  for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
+    EagerTensor Out = matmul_v2_dygraph_function(
+        input0, Ws[i], {{"trans_x", false}, {"trans_y", false}});
+
+    input0 = elementwise_add_dygraph_function(Out, Bs[i], {});
+  }
+
+  EagerTensor Out = reduce_sum_dygraph_function(input0, {{"reduce_all", true}});
+
+  std::vector<EagerTensor> target_tensors = {Out};
+  RunBackward(target_tensors, {});
+
+  if (accuracy_check) {
+    std::unordered_map<std::string, float> result =
+        compute_mlp_expected_results();
+
+    // Examine Forward Grad (w.r.t max_num_runs = 2)
+    eager_test::CompareVariableWithValue<float>(Out, result["Out"]);
+
+    // Examine Backward Grad (w.r.t max_num_runs = 2)
+    eager_test::CompareGradTensorWithValue<float>(X, result["GradX"]);
+    eager_test::CompareGradTensorWithValue<float>(Ws[0], result["GradW"]);
+  }
+}
+
+}  // namespace egr
+
+namespace paddle {
+namespace imperative {
+
+static void FluidCheckTensorValue(const std::shared_ptr<imperative::VarBase>& X,
+                                  const paddle::platform::Place& place,
+                                  float value) {
+  auto* tensor = X->MutableVar()->GetMutable<framework::LoDTensor>();
+  float* t_ptr = tensor->mutable_data<float>(place);
+  std::vector<float> host_data(tensor->numel());
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  if (place == paddle::platform::CUDAPlace()) {
+    paddle::platform::DeviceContextPool& pool =
+        paddle::platform::DeviceContextPool::Instance();
+    auto* dev_ctx =
+        dynamic_cast<paddle::platform::CUDADeviceContext*>(pool.Get(place));
+    auto stream = dev_ctx->stream();
+
+    paddle::memory::Copy(paddle::platform::CPUPlace(), host_data.data(),
+                         paddle::platform::CUDAPlace(), t_ptr,
+                         sizeof(float) * tensor->numel(), stream);
+    t_ptr = host_data.data();
+  }
+#endif
+
+  VLOG(6) << "Tensor Value: " << t_ptr[0] << ", Expected Value: " << value;
+  PADDLE_ENFORCE(
+      t_ptr[0] == value,
+      paddle::platform::errors::Fatal(
+          "Detected numerical Error, Expected %f but got %f", value, t_ptr[0]));
+}
+
+static void FluidCheckGradTensorValue(
+    const std::shared_ptr<imperative::VarBase>& X,
+    const paddle::platform::Place& place, float value) {
+  auto* grad_tensor = X->MutableGradVar()->GetMutable<framework::LoDTensor>();
+  float* g_ptr = grad_tensor->mutable_data<float>(place);
+  std::vector<float> g_host_data(grad_tensor->numel());
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  if (place == paddle::platform::CUDAPlace()) {
+    paddle::platform::DeviceContextPool& pool =
+        paddle::platform::DeviceContextPool::Instance();
+    auto* dev_ctx =
+        dynamic_cast<paddle::platform::CUDADeviceContext*>(pool.Get(place));
+    auto stream = dev_ctx->stream();
+
+    paddle::memory::Copy(paddle::platform::CPUPlace(), g_host_data.data(),
+                         paddle::platform::CUDAPlace(), g_ptr,
+                         sizeof(float) * grad_tensor->numel(), stream);
+    g_ptr = g_host_data.data();
+  }
+#endif
+
+  VLOG(6) << "Tensor Value: " << g_ptr[0] << ", Expected Value: " << value;
+  PADDLE_ENFORCE(
+      g_ptr[0] == value,
+      paddle::platform::errors::Fatal(
+          "Detected numerical Error, Expected %f but got %f", value, g_ptr[0]));
+}
+
+/* --------------------- */
+/* ---- Fluid Scale ---- */
+/* --------------------- */
+// TODO(jiabin): Change this and remove nolint
+void benchmark_fluid_scale(const std::shared_ptr<imperative::VarBase>& X,
+                           const paddle::platform::Place& place,
+                           bool accuracy_check) {
+  imperative::Tracer tracer;
+  framework::AttributeMap attrs;
+
+  attrs["use_mkldnn"] = false;
+  attrs["scale"] = 2;
+  attrs["bias"] = 3;
+  attrs["bias_after_scale"] = true;
+
+  std::shared_ptr<imperative::VarBase> tmp_out = X;
+
+  size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs;
+  for (size_t i = 0; i < max_num_runs; i++) {
+    imperative::NameVarBaseMap ins = {{"X", {tmp_out}}};
+    imperative::NameVarBaseMap outs = {
+        {"Out",
+         {std::shared_ptr<imperative::VarBase>(
+             new imperative::VarBase(true, "Out"))}}};
+
+    tracer.TraceOp("scale", ins, outs, attrs, place, true);
+
+    tmp_out = outs["Out"][0];
+  }
+
+  auto* engine = tracer.GetEngine();
+  std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
+  engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/);
+  engine->Execute();
+
+  if (accuracy_check) {
+    FluidCheckTensorValue(tmp_out, place, 8189.0);
+    FluidCheckGradTensorValue(X, place, 1024.0);
+  }
+}
+
+/* ---------------------- */
+/* ---- Fluid Matmul ---- */
+/* ---------------------- */
+void benchmark_fluid_matmul(const std::shared_ptr<imperative::VarBase>& X,
+                            const std::shared_ptr<imperative::VarBase>& Y,
+                            const paddle::platform::Place& place,
+                            bool accuracy_check) {
+  imperative::Tracer tracer;
+
+  std::shared_ptr<imperative::VarBase> tmp_out = X;
+
+  size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
+  for (size_t i = 0; i < max_num_runs; i++) {
+    framework::AttributeMap attrs;
+    imperative::NameVarBaseMap ins = {{"X", {tmp_out}}, {"Y", {Y}}};
+    imperative::NameVarBaseMap outs = {
+        {"Out",
+         {std::shared_ptr<imperative::VarBase>(
+             new imperative::VarBase(true, "Out"))}}};
+
+    tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true);
+
+    tmp_out = outs["Out"][0];
+  }
+
+  auto* engine = tracer.GetEngine();
+  std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
+  engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/);
+  engine->Execute();
+
+  if (accuracy_check) {
+    FluidCheckTensorValue(tmp_out, place, 16);
+    FluidCheckGradTensorValue(X, place, 16);
+    FluidCheckGradTensorValue(Y, place, 16);
+  }
+}
+
+/* ------------------- */
+/* ---- Fluid MLP ---- */
+/* ------------------- */
+void benchmark_fluid_mlp(
+    const std::shared_ptr<imperative::VarBase>& X,
+    const std::vector<std::shared_ptr<imperative::VarBase>>& Ws,
+    const std::vector<std::shared_ptr<imperative::VarBase>>& Bs,
+    const paddle::platform::Place& place, bool accuracy_check) {
+  imperative::Tracer tracer;
+
+  imperative::NameVarBaseMap ins;
+  imperative::NameVarBaseMap outs;
+  framework::AttributeMap attrs;
+  std::shared_ptr<imperative::VarBase> input0 = X;
+  for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
+    // Matmul0
+    ins = {{"X", {input0}}, {"Y", {Ws[0]}}};
+    outs = {{"Out",
+             {std::shared_ptr<imperative::VarBase>(
+                 new imperative::VarBase(true, "Out"))}}};
+
+    tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true);
+
+    // EW-Add0
+    ins = {{"X", outs["Out"]}, {"Y", {Bs[i]}}};
+    outs = {{"Out",
+             {std::shared_ptr<imperative::VarBase>(
+                 new imperative::VarBase(true, "Out"))}}};
+
+    tracer.TraceOp("elementwise_add", ins, outs, attrs, place, true);
+    input0 = outs["Out"][0];
+  }
+
+  // ReduceSum
+  ins = {{"X", {input0}}};
+  outs = {{"Out",
+           {std::shared_ptr<imperative::VarBase>(
+               new imperative::VarBase(true, "Out"))}}};
+  attrs = {{"reduce_all", true}};
+
+  tracer.TraceOp("reduce_sum", ins, outs, attrs, place, true);
+
+  auto* engine = tracer.GetEngine();
+  std::vector<std::shared_ptr<imperative::VarBase>> grad_tensors{nullptr};
+  engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/);
+  engine->Execute();
+
+  if (accuracy_check) {
+    std::unordered_map<std::string, float> result =
+        egr::compute_mlp_expected_results();
+
+    FluidCheckTensorValue(outs["Out"][0], place, result["Out"]);
+    FluidCheckGradTensorValue(X, place, result["GradX"]);
+    FluidCheckGradTensorValue(Ws[0], place, result["GradW"]);
+  }
+}
+
+}  // namespace imperative
+}  // namespace paddle
diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc
index e69de29bb2..b5ce9223f6 100644
--- a/paddle/fluid/eager/tests/task_tests/generated_test.cc
+++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc
@@ -0,0 +1,127 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Eager Dygraph
+
+#include <chrono>
+
+#include "gtest/gtest.h"
+
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/api/utils/tensor_utils.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/backward.h"
+#include "paddle/fluid/eager/utils.h"
+
+#include "paddle/fluid/eager/tests/test_utils.h"
+#include "paddle/fluid/imperative/tracer.h"
+
+#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+namespace egr {
+
+TEST(Generated, Sigmoid) {
+  // Prepare Device Contexts
+  eager_test::InitEnv(paddle::platform::CPUPlace());
+  VLOG(6) << "Init Env";
+  // 1. Prepare Input
+  paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4});
+  VLOG(6) << "Make Dim";
+  egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue(
+      ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
+      pten::DataLayout::NCHW, 0.0, true);
+  VLOG(6) << "Make EagerTensor";
+  egr_utils_api::RetainGradForTensor(tensor);
+  VLOG(6) << "Retain Grad for Tensor";
+  auto output_tensor = sigmoid_dygraph_function(tensor, {});
+  VLOG(6) << "Run Backward";
+  eager_test::CompareVariableWithValue<float>(output_tensor, 0.5);
+
+  std::vector<egr::EagerTensor> target_tensors = {output_tensor};
+  VLOG(6) << "Runing Backward";
+  RunBackward(target_tensors, {});
+
+  VLOG(6) << "Finish Backward";
+  eager_test::CompareGradTensorWithValue<float>(tensor, 0.25);
+}
+
+TEST(Generated, Matmul_v2) {
+  // Prepare Device Contexts
+  eager_test::InitEnv(paddle::platform::CPUPlace());
+
+  auto tracer = std::make_shared<paddle::imperative::Tracer>();
+  paddle::imperative::SetCurrentTracer(tracer);
+
+  // 1. Prepare Input
+  paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16});
+  egr::EagerTensor X = egr_utils_api::CreateTensorWithValue(
+      ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
+      pten::DataLayout::NCHW, 3.0, true);
+  egr_utils_api::RetainGradForTensor(X);
+
+  paddle::framework::DDim ddimY = paddle::framework::make_ddim({16, 20});
+  egr::EagerTensor Y = egr_utils_api::CreateTensorWithValue(
+      ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
+      pten::DataLayout::NCHW, 2.0, true);
+  egr_utils_api::RetainGradForTensor(Y);
+
+  auto output_tensor = matmul_v2_dygraph_function(
+      X, Y, {{"trans_x", false}, {"trans_y", false}});
+
+  eager_test::CompareVariableWithValue<float>(output_tensor, 96);
+
+  std::vector<egr::EagerTensor> target_tensors = {output_tensor};
+  RunBackward(target_tensors, {});
+
+  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
+  eager_test::CompareGradTensorWithValue<float>(Y, 3.0 * 4);
+}
+
+TEST(Generated, ElementwiseAdd) {
+  // Prepare Device Contexts
+  eager_test::InitEnv(paddle::platform::CPUPlace());
+
+  auto tracer = std::make_shared<paddle::imperative::Tracer>();
+  paddle::imperative::SetCurrentTracer(tracer);
+
+  // 1. Prepare Input
+  paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16});
+  egr::EagerTensor X = egr_utils_api::CreateTensorWithValue(
+      ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
+      pten::DataLayout::NCHW, 3.0, true);
+  egr_utils_api::RetainGradForTensor(X);
+
+  paddle::framework::DDim ddimY = paddle::framework::make_ddim({4, 16});
+  egr::EagerTensor Y = egr_utils_api::CreateTensorWithValue(
+      ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32,
+      pten::DataLayout::NCHW, 2.0, true);
+  egr_utils_api::RetainGradForTensor(Y);
+
+  auto output_tensor = elementwise_add_dygraph_function(X, Y, {});
+
+  eager_test::CompareVariableWithValue<float>(output_tensor, 5);
+
+  std::vector<egr::EagerTensor> target_tensors = {output_tensor};
+  RunBackward(target_tensors, {});
+
+  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
+  eager_test::CompareGradTensorWithValue<float>(Y, 1.0);
+}
+
+}  // namespace egr
+
+USE_OP(sigmoid);
+USE_OP(elementwise_add);
+USE_OP(matmul_v2);
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 7f131f9ccd..c56fe5be4d 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -35,7 +35,7 @@ limitations under the License. */
 namespace paddle {
 namespace pybind {
 
-extern PyTypeObject* pEagerTensorType;
+extern PyTypeObject* p_eager_tensor_type;
 
 static PyObject* eager_tensor_method_numpy(EagerTensorObject* self,
                                            PyObject* args, PyObject* kwargs) {
@@ -167,7 +167,7 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self,
   EAGER_SYNC_TRY
   VLOG(4) << "ClearGradient " << self->eager_tensor.name();
 
-  egr::EagerTensor grad;
+  egr::EagerTensor* grad;
   if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
     // Add RetainGrad as PostHook to AccumulationNode
     std::shared_ptr<egr::GradNodeBase> grad_node =
@@ -182,14 +182,14 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self,
     grad = accumulation_grad_node->Grad();
   } else {
     auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
-    grad = meta->Grad();
+    grad = meta->MutableGrad();
   }
 
-  if (grad.initialized()) {
+  if (grad->initialized()) {
     VLOG(4) << "Gradient of " << self->eager_tensor.name()
             << " is initialized, will be released.";
     auto dense_tensor =
-        std::dynamic_pointer_cast<pten::DenseTensor>(grad.impl());
+        std::dynamic_pointer_cast<pten::DenseTensor>(grad->impl());
     dense_tensor->release();
   }
   Py_INCREF(Py_None);
@@ -202,7 +202,6 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self,
   EAGER_TRY
   VLOG(4) << "ZeroGrads " << self->eager_tensor.name();
 
-  egr::EagerTensor grad;
   if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
     // Add RetainGrad as PostHook to AccumulationNode
     std::shared_ptr<egr::GradNodeBase> grad_node =
@@ -214,21 +213,54 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self,
                                         "with type: GradNodeAccumulation"));
     auto accumulation_grad_node =
         std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
-    grad = accumulation_grad_node->Grad();
+    if (accumulation_grad_node->Grad()->initialized()) {
+      accumulation_grad_node->Grad()->set_tensor(
+          std::make_shared<paddle::experimental::Tensor>(
+              paddle::experimental::zeros_like(
+                  *(accumulation_grad_node->Grad()->Tensor().get()))));
+    }
   } else {
     auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
-    grad = meta->Grad();
+    if (meta->MutableGrad()->initialized()) {
+      meta->MutableGrad()->set_tensor(
+          std::make_shared<paddle::experimental::Tensor>(
+              paddle::experimental::zeros_like(
+                  *(meta->MutableGrad()->Tensor().get()))));
+    }
   }
 
-  if (grad.initialized()) {
-    grad.set_tensor(std::make_shared<paddle::experimental::Tensor>(
-        paddle::experimental::zeros_like(*(grad.Tensor().get()))));
-  }
   Py_INCREF(Py_None);
   return Py_None;
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
+static PyObject* eager_tensor_method_detach(EagerTensorObject* self,
+                                            PyObject* args, PyObject* kwargs) {
+  EAGER_SYNC_TRY
+  PADDLE_ENFORCE_EQ(
+      self->eager_tensor.initialized(), true,
+      platform::errors::InvalidArgument("Tensor %s has not been initialized!",
+                                        self->eager_tensor.name()));
+
+  PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0);
+  if (obj) {
+    auto v = reinterpret_cast<EagerTensorObject*>(obj);
+    new (&(v->eager_tensor)) egr::EagerTensor();
+    v->eager_tensor.set_impl(self->eager_tensor.impl());
+    v->eager_tensor.set_name(egr::Controller::Instance().GenerateUniqueName());
+    auto autograd_meta_src =
+        egr::EagerUtils::autograd_meta(&(self->eager_tensor));
+    auto autograd_meta = egr::EagerUtils::autograd_meta(&(v->eager_tensor));
+    autograd_meta->SetPersistable(autograd_meta_src->Persistable());
+  } else {
+    PADDLE_THROW(platform::errors::Fatal(
+        "tp_alloc return null, can not new a PyObject."));
+  }
+
+  return obj;
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
 PyMethodDef variable_methods[] = {
     {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy,
      METH_VARARGS | METH_KEYWORDS, NULL},
@@ -246,6 +278,8 @@ PyMethodDef variable_methods[] = {
      METH_VARARGS | METH_KEYWORDS, NULL},
     {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads,
      METH_VARARGS | METH_KEYWORDS, NULL},
+    {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach,
+     METH_VARARGS | METH_KEYWORDS, NULL},
     {NULL, NULL, 0, NULL}};
 
 }  // namespace pybind
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index e69de29bb2..71b8bbbb1a 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -0,0 +1,192 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+// disable numpy compile error
+#include <Python.h>
+
+#include <string>
+#include <vector>
+
+#include "paddle/fluid/eager/accumulation/accumulation_node.h"
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/api/utils/tensor_utils.h"
+#include "paddle/fluid/eager/autograd_meta.h"
+#include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/pybind/eager.h"
+#include "paddle/fluid/pybind/eager_utils.h"
+#include "paddle/fluid/pybind/exception.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/core/convert_utils.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/include/core.h"
+#pragma GCC diagnostic ignored "-Wwrite-strings"
+
+namespace paddle {
+namespace pybind {
+
+extern PyTypeObject* p_eager_tensor_type;
+
+PyObject* eager_tensor_properties_get_name(EagerTensorObject* self,
+                                           void* closure) {
+  EAGER_SYNC_TRY
+  return ToPyObject(self->eager_tensor.name());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value,
+                                     void* closure) {
+  EAGER_SYNC_TRY
+  self->eager_tensor.set_name(CastPyArg2AttrString(value, 0));
+  return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
+}
+
+PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self,
+                                                    void* closure) {
+  EAGER_SYNC_TRY
+  auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor);
+  return ToPyObject(meta->StopGradient());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self,
+                                           void* closure) {
+  EAGER_SYNC_TRY
+  if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
+    std::shared_ptr<egr::GradNodeBase> grad_node =
+        egr::EagerUtils::grad_node(self->eager_tensor);
+    PADDLE_ENFORCE(
+        grad_node.get() != nullptr,
+        paddle::platform::errors::Fatal("Detected NULL grad_node"
+                                        "Leaf tensor should have had grad_node "
+                                        "with type: GradNodeAccumulation"));
+    auto accumulation_grad_node =
+        std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
+    return ToPyObject(*accumulation_grad_node->Grad());
+  } else {
+    VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name();
+    auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
+    return ToPyObject(meta->Grad());
+  }
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+int eager_tensor_properties_set_grad(EagerTensorObject* self, PyObject* value,
+                                     void* closure) {
+  EAGER_SYNC_TRY
+  auto src = CastPyArg2EagerTensor(value, 0);
+  PADDLE_ENFORCE(
+      egr::egr_utils_api::IsLeafTensor(self->eager_tensor),
+      paddle::platform::errors::Fatal("Only leaf Tensor can be set grad."));
+  std::shared_ptr<egr::GradNodeBase> grad_node =
+      egr::EagerUtils::grad_node(self->eager_tensor);
+  PADDLE_ENFORCE(
+      grad_node.get() != nullptr,
+      paddle::platform::errors::Fatal("Detected NULL grad_node"
+                                      "Leaf tensor should have had grad_node "
+                                      "with type: GradNodeAccumulation"));
+  auto accumulation_grad_node =
+      std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
+  accumulation_grad_node->Grad()->copy_(src, true);
+  return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
+}
+
+int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self,
+                                              PyObject* value, void* closure) {
+  EAGER_SYNC_TRY
+  auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor);
+  meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0));
+  return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
+}
+
+PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self,
+                                                  void* closure) {
+  EAGER_SYNC_TRY
+  auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor);
+  return ToPyObject(meta->Persistable());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+int eager_tensor_properties_set_persistable(EagerTensorObject* self,
+                                            PyObject* value, void* closure) {
+  EAGER_SYNC_TRY
+  auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor);
+  meta->SetPersistable(CastPyArg2AttrBoolean(value, 0));
+  return 0;
+  EAGER_CATCH_AND_THROW_RETURN_ZERO
+}
+
+PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self,
+                                            void* closure) {
+  EAGER_SYNC_TRY
+  auto ddim = self->eager_tensor.shape();
+  std::vector<int64_t> value;
+  size_t rank = static_cast<size_t>(ddim.size());
+  value.resize(rank);
+  for (size_t i = 0; i < rank; i++) {
+    value[i] = ddim[i];
+  }
+
+  return ToPyObject(value);
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+PyObject* eager_tensor_properties_get_place(EagerTensorObject* self,
+                                            void* closure) {
+  EAGER_SYNC_TRY
+  return ToPyObject(self->eager_tensor.place());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self,
+                                                void* closure) {
+  EAGER_SYNC_TRY
+  std::stringstream ostr;
+  ostr << self->eager_tensor.place();
+  return ToPyObject(ostr.str());
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self,
+                                            void* closure) {
+  EAGER_SYNC_TRY
+  return ToPyObject(pten::TransToProtoVarType(self->eager_tensor.type()));
+  EAGER_CATCH_AND_THROW_RETURN_NULL
+}
+
+struct PyGetSetDef variable_properties[] = {
+    {"grad", (getter)eager_tensor_properties_get_grad,
+     (setter)eager_tensor_properties_set_grad, nullptr, nullptr},
+    {"name", (getter)eager_tensor_properties_get_name,
+     (setter)eager_tensor_properties_set_name, nullptr, nullptr},
+    {"stop_gradient", (getter)eager_tensor_properties_get_stop_gradient,
+     (setter)eager_tensor_properties_set_stop_gradient, nullptr, nullptr},
+    {"persistable", (getter)eager_tensor_properties_get_persistable,
+     (setter)eager_tensor_properties_set_persistable, nullptr, nullptr},
+    {"shape", (getter)eager_tensor_properties_get_shape, nullptr, nullptr,
+     nullptr},
+    // {"is_leaf", (getter)eager_tensor_properties_get_is_leaf, nullptr,
+    // nullptr,
+    //  nullptr},
+    {"place", (getter)eager_tensor_properties_get_place, nullptr, nullptr,
+     nullptr},
+    {"_place_str", (getter)eager_tensor_properties_get_place_str, nullptr,
+     nullptr, nullptr},
+    {"dtype", (getter)eager_tensor_properties_get_dtype, nullptr, nullptr,
+     nullptr},
+    {nullptr, nullptr, nullptr, nullptr, nullptr}};
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index c61f87ccf9..e06e7f52dd 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -22,7 +22,7 @@ import paddle
 from .. import framework
 from .. import core
 from .. import unique_name
-from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode
+from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode, EagerParamBase
 from .base import switch_to_static_graph
 from .math_op_patch import monkey_patch_math_varbase
 from .parallel import scale_loss
@@ -149,7 +149,7 @@ def monkey_patch_varbase():
                     out = linear(t)  # call with different weight
 
         """
-        if _in_eager_mode():
+        if core._in_eager_mode():
             base_tensor = core.eager.EagerTensor
         else:
             base_tensor = core.VarBase
@@ -238,7 +238,7 @@ def monkey_patch_varbase():
         """
         if framework.in_dygraph_mode():
             if grad_tensor is not None:
-                if _in_eager_mode():
+                if core._in_eager_mode():
                     assert isinstance(
                         grad_tensor, core.eager.EagerTensor
                     ), "The type of grad_tensor must be paddle.Tensor"
@@ -250,7 +250,7 @@ def monkey_patch_varbase():
                     "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
                     grad_tensor.name, grad_tensor.shape, self.name, self.shape)
 
-            if _in_eager_mode():
+            if core._in_eager_mode():
                 if grad_tensor is None:
                     grad_tensor = []
                 else:
@@ -258,7 +258,7 @@ def monkey_patch_varbase():
             if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu():
                 # TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
                 scaled_loss = scale_loss(self)
-                if _in_eager_mode():
+                if core._in_eager_mode():
                     core.eager.run_backward([scaled_loss], grad_tensor,
                                             retain_graph)
                 else:
@@ -266,7 +266,7 @@ def monkey_patch_varbase():
                                               retain_graph,
                                               framework._dygraph_tracer())
             else:
-                if _in_eager_mode():
+                if core._in_eager_mode():
                     core.eager.run_backward([self], grad_tensor, retain_graph)
                 else:
                     core.dygraph_run_backward([self], [grad_tensor],
@@ -305,7 +305,7 @@ def monkey_patch_varbase():
                 # [500.]
 
         """
-        if _in_eager_mode():
+        if core._in_eager_mode():
             if not self.grad._is_initialized():
                 return None
             # TODO(wanghuancoder) support SELECTED_ROWS
@@ -587,7 +587,7 @@ def monkey_patch_varbase():
                 #        [[0.30574632, 0.55739117, 0.30902600, 0.39413780, 0.44830436],
                 #         [0.79010487, 0.53972793, 0.09495186, 0.44267157, 0.72112119]])
         """
-        if _in_eager_mode():
+        if core._in_eager_mode():
             from paddle.tensor.to_string import eager_tensor_to_string
             return eager_tensor_to_string(self)
         else:
@@ -619,7 +619,7 @@ def monkey_patch_varbase():
             raise RuntimeError(
                 "Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy"
             )
-        if _in_eager_mode():
+        if core._in_eager_mode():
             new_varbase = core.eager.EagerTensor()
         else:
             new_varbase = core.VarBase()
@@ -763,6 +763,14 @@ def monkey_patch_varbase():
         else:
             return None
 
+    @framework.dygraph_only
+    def _set_grad_ivar(self, value):
+        if isinstance(self, EagerParamBase):
+            self.grad = value
+        else:
+            raise TypeError(
+                "_set_grad_ivar is only supported for Parameter Tensor")
+
     @framework.dygraph_only
     def clear_gradient(self, set_to_zero=True):
         if set_to_zero:
@@ -770,6 +778,10 @@ def monkey_patch_varbase():
         else:
             self._clear_gradient()
 
+    @framework.dygraph_only
+    def clone(self):
+        return _C_ops_.assign(self)
+
     if core._in_eager_mode() and not hasattr(core, "eager"):
         return
 
@@ -790,7 +802,9 @@ def monkey_patch_varbase():
 
     if core._in_eager_mode():
         setattr(core.eager.EagerTensor, "_grad_ivar", _grad_ivar)
+        setattr(core.eager.EagerTensor, "_set_grad_ivar", _set_grad_ivar)
         setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient)
+        setattr(core.eager.EagerTensor, "clone", clone)
     else:
         setattr(core.VarBase, "__name__", "Tensor")
         setattr(core.VarBase, "grad", grad)
-- 
Gitee