diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 0817dc336716211e3a538195f81d60ef3e0c5d08..7f44afabf2590c1ebfb0b2f95fa133d6bfc6548e 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -29,6 +29,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/scope_guard.h" #include "paddle/fluid/imperative/all_reduce.h" #include "paddle/fluid/imperative/amp_auto_cast.h" #include "paddle/fluid/imperative/basic_engine.h" @@ -51,6 +52,8 @@ limitations under the License. */ namespace paddle { namespace pybind { +PyTypeObject *g_varbase_pytype = nullptr; + namespace py = ::pybind11; class Layer : public imperative::Layer { @@ -133,30 +136,44 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) { return place_obj.cast(); } else if (py::isinstance(place_obj)) { return place_obj.cast(); + } else if (py::isinstance(place_obj)) { + return place_obj.cast(); } else if (py::isinstance(place_obj)) { return place_obj.cast(); } else { PADDLE_THROW(platform::errors::InvalidArgument( "Place should be one of " - "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); + "Place/CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace")); } } -static void InitTensorForVarBase(imperative::VarBase *self, - const py::array &array, - const platform::Place place, - bool persistable = false, - bool zero_copy = false, std::string name = "", - int stop_gradient = -1) { - if (name == "") { - name = - imperative::GetCurrentTracer()->GenerateUniqueName("generated_tensor"); - } - VLOG(5) << "Init Tensor as: / name: " << name - << " / persistable: " << persistable << " / zero_copy: " << zero_copy +// only initialize varbase, but not its tensor. +static void InitVarBaseOnly(imperative::VarBase *self, const std::string &name, + bool persistable = false, int stop_gradient = -1) { + auto name_ = name == "" + ? imperative::GetCurrentTracer()->GenerateUniqueName( + "generated_tensor") + : name; + + VLOG(5) << "Init Tensor as: / name: " << name_ + << " / persistable: " << persistable << " / stop_gradient: " << stop_gradient; - new (self) imperative::VarBase(name); + new (self) imperative::VarBase(name_); + if (stop_gradient != -1) { + self->SetOverridedStopGradient(stop_gradient); + } + self->SetPersistable(persistable); + self->SetType(framework::proto::VarType::LOD_TENSOR); +} + +// initialize varbase and its tensor. +static void InitVarBaseAndTensor( + imperative::VarBase *self, const py::array &array, + const platform::Place &place, const std::string &name, + bool persistable = false, bool zero_copy = false, int stop_gradient = -1) { + InitVarBaseOnly(self, name, persistable, stop_gradient); auto *tensor = self->MutableVar()->GetMutable(); + VLOG(4) << "zero_copy: " << zero_copy; if (platform::is_cpu_place(place)) { SetTensorFromPyArray( tensor, array, BOOST_GET_CONST(platform::CPUPlace, place), zero_copy); @@ -170,30 +187,23 @@ static void InitTensorForVarBase(imperative::VarBase *self, SetTensorFromPyArray( tensor, array, BOOST_GET_CONST(platform::CUDAPinnedPlace, place), zero_copy); + } else if (platform::is_npu_place(place)) { + SetTensorFromPyArray( + tensor, array, BOOST_GET_CONST(platform::NPUPlace, place), zero_copy); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Place should be one of CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace")); - } - if (stop_gradient != -1) { - self->SetOverridedStopGradient(stop_gradient); + "Place should be one of " + "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace")); } - self->SetPersistable(persistable); - self->SetType(framework::proto::VarType::LOD_TENSOR); self->SetDataType(tensor->type()); } static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self, const py::kwargs &kwargs) { VLOG(4) << "Init VarBase from kwargs: "; - PADDLE_ENFORCE_EQ( - kwargs.contains("value"), true, - platform::errors::NotFound( - "The kwargs used to create Varbase misses argument: value")); auto persistable = kwargs.contains("persistable") ? kwargs["persistable"].cast() : false; - auto array = kwargs.contains("value") ? kwargs["value"].cast() - : py::array(); auto zero_copy = kwargs.contains("zero_copy") ? kwargs["zero_copy"].cast() : false; auto name = kwargs.contains("name") ? kwargs["name"].cast() : ""; @@ -201,10 +211,18 @@ static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self, ? kwargs["stop_gradient"].cast() : -1; auto default_place = imperative::GetCurrentTracer()->ExpectedPlace(); - auto place = kwargs.contains("place") ? PyObjectToPlace(kwargs["place"]) - : default_place; - InitTensorForVarBase(self, array, place, persistable, zero_copy, name, - stop_gradient); + + if (kwargs.contains("value")) { + auto array = kwargs["value"].cast(); + // place is only used when array is given, otherwise, it is meaningless and + // ignored + auto place = kwargs.contains("place") ? PyObjectToPlace(kwargs["place"]) + : default_place; + InitVarBaseAndTensor(self, array, place, name, persistable, zero_copy, + stop_gradient); + } else { + InitVarBaseOnly(self, name, persistable, stop_gradient); + } } template @@ -239,11 +257,11 @@ static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self, const py::array &array) { auto place = imperative::GetCurrentTracer()->ExpectedPlace(); VLOG(4) << "Init VarBase from numpy at " << place; - InitTensorForVarBase(self, array, place); + InitVarBaseAndTensor(self, array, place, ""); } static void InitVarBaseFromTensorWithArgDefault( - imperative::VarBase *self, const framework::LoDTensor &tensor) { + imperative::VarBase *self, const framework::Tensor &tensor) { VLOG(4) << "Init VarBase"; auto place = imperative::GetCurrentTracer()->ExpectedPlace(); new (self) imperative::VarBase( @@ -397,37 +415,45 @@ static int _PySlice_GetIndices(PySliceObject *r, Py_ssize_t length, return 0; } -static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, - std::vector *slice_axes, - std::vector *slice_starts, - std::vector *slice_ends, - std::vector *slice_strides, - std::vector *decrease_axis, - std::vector *infer_flags) { - // We allow indexing by Integers, Slices, and tuples of those - // types. - // Ellipsis and None are not supported yet. +static void ParseIndexingSlice( + framework::LoDTensor *tensor, PyObject *_index, + std::vector *slice_axes, std::vector *slice_starts, + std::vector *slice_ends, std::vector *slice_strides, + std::vector *decrease_axis, std::vector *none_axes, + std::vector *infer_flags, std::vector *list_select_idxs, + bool *list_select_flag) { + // We allow indexing by Integers, Slices, Ellipsis, None, tuples of those + // types, and list of Bool and Integers. // wrap to tuple + + // NOTE(zhiqiu): PyTuple_Pack increases refcount. PyObject *index = !PyTuple_Check(_index) ? PyTuple_Pack(1, _index) : _index; + DEFINE_PADDLE_SCOPE_GUARD([index, _index]() { + if (!PyTuple_Check(_index)) { + Py_DECREF(index); + VLOG(4) << "Call Py_DECREF"; + } + }); PADDLE_ENFORCE_EQ( tensor->IsInitialized(), true, platform::errors::InvalidArgument("tensor has not been initialized")); const auto &shape = tensor->dims(); const int rank = shape.size(); const int size = PyTuple_GET_SIZE(index); - PADDLE_ENFORCE_EQ( - size <= rank, true, - platform::errors::InvalidArgument( - "too many indices (%d) for tensor of dimension %d", size, rank)); + + // specified_dims is the number of dimensions which indexed by Interger, + // Slices. + int specified_dims = 0; for (int dim = 0; dim < size; ++dim) { PyObject *slice_item = PyTuple_GetItem(index, dim); - PADDLE_ENFORCE_EQ(PyCheckInteger(slice_item) || PySlice_Check(slice_item), - true, - platform::errors::InvalidArgument( - "Currently, VarBase.__getitem__() only allows " - "indexing by Integers, Slices, and tuples of " - "these types, but received %s in %dth slice item", - std::string(Py_TYPE(slice_item)->tp_name), dim + 1)); + if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) { + specified_dims++; + } + } + + for (int i = 0, dim = 0; i < size; ++i) { + PyObject *slice_item = PyTuple_GetItem(index, i); + infer_flags->push_back(1); int dim_len = shape[dim]; if (PyCheckInteger(slice_item)) { @@ -450,7 +476,8 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, slice_ends->push_back(start + 1); slice_strides->push_back(1); decrease_axis->push_back(dim); - } else { + dim++; + } else if (PySlice_Check(slice_item)) { // slice item Py_ssize_t start, end, step; PySliceObject *p = reinterpret_cast(slice_item); @@ -458,15 +485,137 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, // :: or : or 0:dim_len:1 if (start == 0 && end == dim_len && step == 1) { + dim++; continue; } slice_axes->push_back(dim); slice_starts->push_back(start); slice_ends->push_back(end); slice_strides->push_back(step); + dim++; + } else if (slice_item == Py_Ellipsis) { + dim += rank - specified_dims; + } else if (slice_item == Py_None) { + none_axes->push_back(dim); + } else if (PyList_Check(slice_item)) { + *list_select_flag = true; + PADDLE_ENFORCE_EQ( + size, 1, + platform::errors::InvalidArgument( + "When index contains a list, its length is excepted to 1, " + "but received %d", + size)); + bool all_bool = true; + int list_size = PyList_GET_SIZE(slice_item); + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (PyCheckInteger(list_item)) { + all_bool = false; + } else if (!PyBool_Check(list_item)) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Only support int or bool in index list.")); + } + } + if (all_bool) { + PADDLE_ENFORCE_EQ( + list_size, shape[0], + platform::errors::InvalidArgument( + "The dimension of bool index doesn't match indexed array along " + "dimension 0, the target dimension is %d, but received %d.", + shape[0], list_size)); + + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (list_item == Py_True) { + list_select_idxs->push_back(j); + } + } + } else { + for (int j = 0; j < list_size; ++j) { + PyObject *list_item = PyList_GetItem(slice_item, j); + if (PyCheckInteger(list_item)) { + list_select_idxs->push_back( + static_cast(PyLong_AsLong(list_item))); + } else if (list_item == Py_True) { + list_select_idxs->push_back(1); + } else { + list_select_idxs->push_back(0); + } + } + } + + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Currently, VarBase.__getitem__() only allows indexing " + "by Integers, Slices, Ellipsis, None, tuples of these types " + "and list of Bool and Integers, but received " + "%s in %dth slice item", + std::string(Py_TYPE(slice_item)->tp_name), i + 1)); } } - if (!PyTuple_Check(_index)) Py_DecRef(index); + + // valid_index is the number of dimensions exclude None index + const int valid_indexs = size - none_axes->size(); + PADDLE_ENFORCE_EQ(valid_indexs <= rank, true, + platform::errors::InvalidArgument( + "Too many indices (%d) for tensor of dimension %d.", + valid_indexs, rank)); +} + +template +static void VarBaseCopy(std::shared_ptr &src, // NOLINT + imperative::VarBase &dst, // NOLINT + const P &dst_device, const bool blocking) { + if (dst.SharedVar()->IsEmpty()) { + VLOG(3) << "deep copy Variable from " << src->Name() << " to " + << dst.Name(); + dst.SetPersistable(src->Persistable()); + dst.SetDataType(src->DataType()); + dst.SetType(src->Type()); + dst.SetOverridedStopGradient(src->OverridedStopGradient()); + if (!src->SharedVar()->IsEmpty()) { + if (src->Var().IsType()) { + auto &src_tensor = src->Var().Get(); + auto *dst_tensor = dst.MutableVar()->GetMutable(); + dst_tensor->set_lod(src_tensor.lod()); + framework::TensorCopy(src_tensor, dst_device, dst_tensor); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_device)->Wait(); + auto src_device = src_tensor.place(); + if (!(src_device == dst_device)) { + platform::DeviceContextPool::Instance().Get(src_device)->Wait(); + } + } + } else if (src->Var().IsType()) { + auto &src_selected_rows = src->Var().Get(); + auto *dst_selected_rows = + dst.MutableVar()->GetMutable(); + dst_selected_rows->set_height(src_selected_rows.height()); + dst_selected_rows->set_rows(src_selected_rows.rows()); + framework::TensorCopy(src_selected_rows.value(), dst_device, + dst_selected_rows->mutable_value()); + if (blocking) { + platform::DeviceContextPool::Instance().Get(dst_device)->Wait(); + auto src_device = src_selected_rows.value().place(); + if (!(src_device == dst_device)) { + platform::DeviceContextPool::Instance().Get(src_device)->Wait(); + } + } + } + + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(src, dst_device); + } + + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The source Tensor(%s) can not copy when it is empty.", src->Name())); + } + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "The destion Tensor(%s) can not copy when it is not empty.", + dst.Name())); + } } // Bind Methods @@ -611,9 +760,10 @@ void BindImperative(py::module *m_ptr) { imperative::SetCurrentTracer(tracer); }); - py::class_>( - m, "VarBase", R"DOC()DOC") - .def_static("_alive_vars", &imperative::VarBase::AliveVarNames) + py::class_> varbase( + m, "VarBase", R"DOC()DOC"); + g_varbase_pytype = (PyTypeObject *)varbase.ptr(); // NOLINT + varbase.def_static("_alive_vars", &imperative::VarBase::AliveVarNames) .def("__init__", [](imperative::VarBase &self) { std::string name = @@ -659,17 +809,31 @@ void BindImperative(py::module *m_ptr) { py::arg("value"), py::arg("place"), py::arg("persistable") = false, py::arg("zero_copy") = false, py::arg("name") = "", py::arg("stop_gradient") = -1) + .def("__init__", &InitVarBaseFromNumpyWithArg, + py::arg("value"), py::arg("place"), py::arg("persistable") = false, + py::arg("zero_copy") = false, py::arg("name") = "", + py::arg("stop_gradient") = -1) .def("__init__", &InitVarBaseFromNumpyWithArgDefault, py::arg("value")) .def("__init__", &InitVarBaseFromTensorWithArgDefault, py::arg("tensor")) .def("__init__", &InitVarBaseFromNumpyWithKwargs) - .def("__setitem__", + .def("__setitem_varbase__", [](std::shared_ptr &self, py::handle _index, py::object &value_obj) { + VLOG(4) << "Call __setitem_varbase__"; + auto self_tensor = self->MutableVar()->GetMutable(); + // NOTE(zhiqiu): PyTuple_Pack increases refcount while PyTuple_New + // https://github.com/python/cpython/blob/24b63c695ae0a95b06379eaadace66735abac1e2/Objects/tupleobject.c#L251 PyObject *index_ptr = !PyTuple_Check(_index.ptr()) ? PyTuple_Pack(1, _index.ptr()) : _index.ptr(); + DEFINE_PADDLE_SCOPE_GUARD([index_ptr, &_index]() { + if (!PyTuple_Check(_index.ptr())) { + Py_DECREF(index_ptr); + VLOG(4) << "Call Py_DECREF"; + } + }); // 1. Check argumnets // 1.1 Check whether value obj is a tensor. bool value_is_tensor = true; @@ -680,11 +844,24 @@ void BindImperative(py::module *m_ptr) { value_is_tensor = false; } + auto is_tensor = [](py::handle var) { + if (!var.ptr() || var.ptr() == Py_None) { + return false; + } + try { + py::cast>(var); + return true; + } catch (py::cast_error &) { + return false; + } + }; + // 1.2 Check whether _index can be parsed. const int size = PyTuple_GET_SIZE(index_ptr); for (int dim = 0; dim < size; ++dim) { PyObject *slice_item = PyTuple_GetItem(index_ptr, dim); - if (!(PyCheckInteger(slice_item) || PySlice_Check(slice_item))) { + if (!(PyCheckInteger(slice_item) || PySlice_Check(slice_item) || + slice_item == Py_Ellipsis || slice_item == Py_None)) { parse_index = false; break; } @@ -696,20 +873,32 @@ void BindImperative(py::module *m_ptr) { // copys data to cpu place, which reduces performance. if (parse_index && value_is_tensor) { std::vector axes, starts, ends, steps, decrease_axes, - infer_flags; + none_axes, infer_flags, list_select_idxs; + // if index is a list, list_select_flag will be true + bool list_select_flag; ParseIndexingSlice(self_tensor, index_ptr, &axes, &starts, &ends, - &steps, &decrease_axes, &infer_flags); + &steps, &decrease_axes, &none_axes, + &infer_flags, &list_select_idxs, + &list_select_flag); framework::AttributeMap attrs = { {"axes", axes}, {"starts", starts}, {"ends", ends}, {"steps", steps}, - {"decrease_axes", decrease_axes}}; + {"decrease_axes", decrease_axes}, + {"none_axes", none_axes}}; imperative::NameVarBaseMap ins = {{"Input", {self}}}; imperative::NameVarBaseMap outs = {{"Out", {self}}}; + PADDLE_ENFORCE_EQ( + self->IsLeaf() && !self->OverridedStopGradient(), false, + platform::errors::InvalidArgument( + "Leaf Tensor (%s) that doesn't stop gradient can't use " + "inplace strategy.", + self->Name())); + auto value_tensor = value_obj.cast>(); ins.insert({"ValueTensor", {value_tensor}}); @@ -718,24 +907,48 @@ void BindImperative(py::module *m_ptr) { { // Release gil and do tracing py::gil_scoped_release release; - tracer->TraceOp("set_value", ins, outs, std::move(attrs)); + tracer->TraceOp("set_value", ins, outs, std::move(attrs), + {{"Input", "Out"}}); } } else { auto self_numpy = TensorToPyArray(*self_tensor); + VLOG(4) << "parse_index is false"; if (value_is_tensor) { + VLOG(4) << "value is tensor"; auto value = value_obj.cast>(); auto value_tensor = value->MutableVar()->GetMutable(); auto value_numpy = TensorToPyArray(*value_tensor); - - self_numpy[_index] = value_numpy; + if (is_tensor(_index)) { + VLOG(4) << "index is tensor"; + auto index_var = + py::cast>(_index); + auto index_tensor = index_var->MutableVar() + ->GetMutable(); + auto index_numpy = TensorToPyArray(*index_tensor); + self_numpy[index_numpy] = value_numpy; + } else { + VLOG(4) << "index is not tensor"; + self_numpy[_index] = value_numpy; + } SetTensorFromPyArray(self_tensor, self_numpy, self_tensor->place(), true); } else { - auto value_numpy = value_obj; - self_numpy[_index] = value_numpy; + VLOG(4) << "value is not tensor"; + if (is_tensor(_index)) { + VLOG(4) << "index is tensor"; + auto index_var = + py::cast>(_index); + auto index_tensor = index_var->MutableVar() + ->GetMutable(); + auto index_numpy = TensorToPyArray(*index_tensor); + self_numpy[index_numpy] = value_obj; + } else { + VLOG(4) << "index is not tensor"; + self_numpy[_index] = value_obj; + } SetTensorFromPyArray(self_tensor, self_numpy, self_tensor->place(), true); } @@ -745,21 +958,31 @@ void BindImperative(py::module *m_ptr) { // inplace operator for the VarBase self. self->BumpInplaceVersion(); }) - .def("__getitem__", + .def("_getitem_index_not_tensor", [](std::shared_ptr &self, py::handle _index) { + VLOG(4) << "Call _getitem_index_not_tensor"; std::vector slice_axes, slice_starts, slice_ends, - slice_strides, decrease_axis, infer_flags; + slice_strides, decrease_axis, none_axes, infer_flags, + list_select_idxs; + // if index is a list, list_select_flag will be true + bool list_select_flag = false; auto tensor = self->MutableVar()->GetMutable(); ParseIndexingSlice(tensor, _index.ptr(), &slice_axes, &slice_starts, &slice_ends, &slice_strides, - &decrease_axis, &infer_flags); + &decrease_axis, &none_axes, &infer_flags, + &list_select_idxs, &list_select_flag); // release gil and do tracing py::gil_scoped_release release; const auto &tracer = imperative::GetCurrentTracer(); - if (slice_axes.empty()) { - return self; - } else { + + auto out = slice_axes.empty() && !list_select_flag + ? self + : std::shared_ptr( + new imperative::VarBase( + tracer->GenerateUniqueName())); + + if (!slice_axes.empty()) { imperative::NameVarBaseMap ins = {{"Input", {self}}}; framework::AttributeMap attrs = { {"axes", slice_axes}, @@ -767,8 +990,6 @@ void BindImperative(py::module *m_ptr) { {"ends", slice_ends}, {"infer_flags", infer_flags}, {"decrease_axis", decrease_axis}}; - auto out = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); imperative::NameVarBaseMap outs = {{"Out", {out}}}; std::string op_type = "slice"; for (auto stride : slice_strides) { @@ -780,9 +1001,154 @@ void BindImperative(py::module *m_ptr) { } } tracer->TraceOp(op_type, ins, outs, std::move(attrs)); - return out; } + if (!none_axes.empty()) { + // Deal with cases when all axes are decreased. + // After slice, the shape of out is [1], which should have been + // [], but Paddle doesn't support scalar. + // In order to ensure the correctness of the final shape of out, + // one dimension of out needs to be decreased. + // For example: + // # x.shape: (2,3,4) + // out = x[0, 1, 1, None] # out.shape : (1) + if (static_cast(decrease_axis.size()) == + tensor->dims().size()) { + none_axes.pop_back(); + } + if (!none_axes.empty()) { + // Deal with cases that decrease_axes is not empty + // For example: + // # x.shape: (2,3,4) + // out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for (auto &axis : none_axes) { + int len = 0; + for (int da : decrease_axis) { + if (da < axis) { + len++; + } + } + axis -= len; + } + + // Deal with cases that there are more than one + // prefix none index, For example: + // [None, None, :, :, None] + // the none_axes int the return of ParseIndexingSlice is: + // [0, 0, 2 ] + // according to the interface of "unsqueeze2", + // we should convert it to: + // [0, 0, 4 ] + int prefix_zero_cnt = 0; + for (const auto &axis : none_axes) { + if (axis == 0) { + prefix_zero_cnt++; + } else { + break; + } + } + if (prefix_zero_cnt > 0) { + int none_axes_num = static_cast(none_axes.size()); + for (int i = prefix_zero_cnt; i < none_axes_num; ++i) { + none_axes[i] += prefix_zero_cnt; + } + } + + imperative::NameVarBaseMap ins = {{"X", {out}}}; + framework::AttributeMap attrs = {{"axes", none_axes}}; + auto new_out = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto out_xshape = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + imperative::NameVarBaseMap outs = {{"Out", {new_out}}, + {"XShape", {out_xshape}}}; + tracer->TraceOp("unsqueeze2", ins, outs, std::move(attrs)); + + return new_out; + } + } + + // the index is a list + if (list_select_flag) { + auto select_index = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto *idx_tensor = select_index->MutableVar() + ->GetMutable(); + auto *dev_ctx = platform::DeviceContextPool::Instance().Get( + tracer->ExpectedPlace()); + TensorFromVector(list_select_idxs, *dev_ctx, idx_tensor); + + imperative::NameVarBaseMap ins = {{"X", {self}}, + {"Index", {select_index}}}; + imperative::NameVarBaseMap outs = {{"Out", {out}}}; + tracer->TraceOp("index_select", ins, outs, {{"dim", 0}}); + } + + return out; }) + .def( + "_getitem_from_offset", + [](std::shared_ptr &self, const py::args &args) { + const auto &tensor = self->Var().Get(); + PADDLE_ENFORCE_EQ( + tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor of %s is Empty, please check if it has no data.", + self->Name())); + + const auto &tensor_dims = tensor.dims(); + + std::vector dims(tensor_dims.size()); + std::vector strides(tensor_dims.size()); + + size_t numel = 1; + for (int i = tensor_dims.size() - 1; i >= 0; --i) { + strides[i] = numel; + dims[i] = static_cast(tensor_dims[i]); + numel *= dims[i]; + } + size_t offset = 0; + if (args.empty()) { + PADDLE_ENFORCE_EQ( + numel, 1, + platform::errors::InvalidArgument( + "only one element tensors can be converted to Python " + "scalars when no input coordinates")); + } else if (args.size() == 1) { + offset = args[0].cast(); + PADDLE_ENFORCE_LT( + offset, numel, + platform::errors::InvalidArgument( + "index %d is out of bounds for size %d", offset, numel)); + } else { + PADDLE_ENFORCE_EQ(args.size(), dims.size(), + platform::errors::InvalidArgument( + "incorrect number of indices for Tensor")); + + for (size_t i = 0; i < args.size(); ++i) { + size_t index = args[i].cast(); + PADDLE_ENFORCE_LT( + index, dims[i], + platform::errors::InvalidArgument( + "index %d is out fo bounds for axis %d with size %d", + index, i, dims[i])); + offset += index * strides[i]; + } + } +#define TENSOR_TO_PY_SCALAR(T, proto_type) \ + if (tensor.type() == proto_type) { \ + std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(proto_type); \ + T b = TensorGetElement(tensor, offset); \ + return py::array(py::dtype(py_dtype_str.c_str()), {}, {}, \ + static_cast(&b)); \ + } + + _ForEachDataType_(TENSOR_TO_PY_SCALAR); +#undef TENSOR_TO_PY_SCALAR + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported tensor data type: %s", + framework::DataTypeToString(tensor.type()))); + }, + py::return_value_policy::copy) .def("_inplace_version", [](imperative::VarBase &self) -> uint32_t { const auto &var = self.MutableVar(); @@ -1182,20 +1548,26 @@ void BindImperative(py::module *m_ptr) { )DOC") .def("cuda", - [](const std::shared_ptr &self, int device_id, - bool blocking) { + [](const std::shared_ptr &self, + py::handle &handle, bool blocking) { #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) PADDLE_THROW(platform::errors::PermissionDenied( "Cannot copy this Tensor to GPU in CPU version Paddle, " "Please recompile or reinstall Paddle with CUDA support.")); #else int device_count = platform::GetCUDADeviceCount(); - if (device_id == -1) { + int device_id = 0; + if (handle == py::none()) { if (platform::is_gpu_place(self->Place())) { return self; - } else { - device_id = 0; } + } else { + PyObject *py_obj = handle.ptr(); + PADDLE_ENFORCE_EQ( + PyCheckInteger(py_obj), true, + platform::errors::InvalidArgument( + " 'device_id' must be a positive integer")); + device_id = py::cast(handle); } PADDLE_ENFORCE_GE( device_id, 0, @@ -1219,26 +1591,30 @@ void BindImperative(py::module *m_ptr) { } #endif }, - py::arg("device_id") = -1, py::arg("blocking") = true, R"DOC( + py::arg("device_id") = py::none(), py::arg("blocking") = true, R"DOC( Returns a copy of this Tensor in GPU memory. If this Tensor is already in GPU memory and device_id is default, then no copy is performed and the original Tensor is returned. Args: - device_id(int, optional): The destination GPU device id. Defaults to the current device. + device_id(int, optional): The destination GPU device id. Default: None, means current device. blocking(bool, optional): If False and the source is in pinned memory, the copy will be asynchronous with respect to the host. Otherwise, the argument has no effect. Default: False. Examples: .. code-block:: python + # required: gpu import paddle x = paddle.to_tensor(1.0, place=paddle.CPUPlace()) print(x.place) # CPUPlace y = x.cuda() print(y.place) # CUDAPlace(0) + + y = x.cuda(None) + print(y.place) # CUDAPlace(0) y = x.cuda(1) print(y.place) # CUDAPlace(1) @@ -1321,6 +1697,16 @@ void BindImperative(py::module *m_ptr) { return new_var; }, py::return_value_policy::copy) + .def("_copy_to", + [](const std::shared_ptr &self, + const platform::NPUPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) .def("_copy_to", [](const std::shared_ptr &self, const platform::Place &place, bool blocking) { @@ -1340,28 +1726,22 @@ void BindImperative(py::module *m_ptr) { &imperative::VarBase::SetOverridedStopGradient) .def_property("persistable", &imperative::VarBase::Persistable, &imperative::VarBase::SetPersistable) - .def_property_readonly("shape", - [](imperative::VarBase &self) { - if (self.Var().IsType()) { - return framework::vectorize( - self.Var() - .Get() - .dims()); - } else if (self.Var() - .IsType< - framework::SelectedRows>()) { - return framework::vectorize( - self.Var() - .Get() - .value() - .dims()); - } else { - VLOG(2) << "It is meaningless to get shape of " - "variable type " - << GetTypeName(self); - return std::vector(); - } - }) + .def_property_readonly( + "shape", + [](imperative::VarBase &self) { + if (self.Var().IsType()) { + return framework::vectorize( + self.Var().Get().dims()); + } else if (self.Var().IsType()) { + return framework::vectorize( + self.Var().Get().value().dims()); + } else { + VLOG(2) << "It is meaningless to get shape of " + "variable type " + << GetTypeName(self); + return std::vector(); + } + }) .def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf, R"DOC( Whether a Tensor is leaf Tensor. @@ -1453,6 +1833,11 @@ void BindImperative(py::module *m_ptr) { self.SetExpectedPlace(*p); VLOG(4) << "Tracer(" << &self << ")" << " set expected place " << *p; + } else if (py::isinstance(obj)) { + auto p = obj.cast(); + self.SetExpectedPlace(*p); + VLOG(4) << "Tracer(" << &self << ")" + << " set expected place " << *p; } else if (py::isinstance(obj)) { auto p = obj.cast(); self.SetExpectedPlace(*p); @@ -1461,7 +1846,7 @@ void BindImperative(py::module *m_ptr) { } else { PADDLE_THROW(platform::errors::InvalidArgument( "Incompatible Place Type: supports XPUPlace, CUDAPlace, " - "CPUPlace, " + "CPUPlace, NPUPlace" "and CUDAPinnedPlace, " "but got Unknown Type!")); } @@ -1487,7 +1872,7 @@ void BindImperative(py::module *m_ptr) { allow_ops); imperative::AmpOperators::Instance().GetMutableBlockOps()->swap( block_ops); - VLOG(4) << "AMP operators changed, " + VLOG(5) << "AMP operators changed, " << imperative::AmpOperators::Instance(); }) .def("_get_amp_op_list", @@ -1522,6 +1907,19 @@ void BindImperative(py::module *m_ptr) { std::move(attrs), place, trace_backward); } }) + .def("trace", + [](imperative::Tracer &self, const std::string &type, + const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs, + framework::AttributeMap attrs, const platform::NPUPlace &place, + bool trace_backward) { + auto ins_map = ConvertToNameVarBaseMap(ins); + auto outs_map = ConvertToNameVarBaseMap(outs); + { + py::gil_scoped_release release; + self.TraceOp(type, std::move(ins_map), std::move(outs_map), + std::move(attrs), place, trace_backward); + } + }) .def("trace", [](imperative::Tracer &self, const std::string &type, const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs, @@ -1574,6 +1972,13 @@ void BindImperative(py::module *m_ptr) { self.nrings_ = nrings; }); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def("varbase_copy", &VarBaseCopy); + m.def( "dygraph_partial_grad", [](const std::vector> &input_targets, @@ -1673,6 +2078,12 @@ void BindImperative(py::module *m_ptr) { const py::args args, const py::kwargs kwargs) { return imperative::PyLayerApply(place, cls, args, kwargs); }); + + m.def("pylayer_apply", + [](const platform::NPUPlace &place, const py::object &cls, + const py::args args, const py::kwargs kwargs) { + return imperative::PyLayerApply(place, cls, args, kwargs); + }); } } // namespace pybind diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 64209aee875ba02364481674f796a0b519f771f5..c42a2a5943d11a33b1dc923a9361324c7956fbc7 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -16,16 +16,18 @@ import inspect import numpy as np import warnings import weakref +import sys import paddle from .. import framework from .. import core from .. import unique_name -from ..framework import Variable, Parameter, ParamBase +from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_ from .base import switch_to_static_graph from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE +import paddle.utils.deprecated as deprecated class TensorHookRemoveHelper(object): @@ -85,7 +87,7 @@ def monkey_patch_varbase(): """ - # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph. + # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph. # It will fail. So, for propery in dygraph only, should not let it getattr(self, attr, None). attr_not_need_keys = ['grad'] if isinstance(self, ParamBase): @@ -107,6 +109,8 @@ def monkey_patch_varbase(): if to_parameter or isinstance(self, ParamBase): del attr_kwargs['persistable'] + # NOTE(Aurelius84): All parameters should be placed into global block. + attr_kwargs['block'] = attr_kwargs['block'].program.global_block() static_var = Parameter(**attr_kwargs) else: static_var = Variable(**attr_kwargs) @@ -238,8 +242,17 @@ def monkey_patch_varbase(): "Variable.backward() is only available in DyGraph mode") @framework.dygraph_only + @deprecated( + since="2.1.0", + level=1, + reason="Please use tensor.grad, which returns the tensor value of the gradient." + ) def gradient(self): """ + .. warning:: + This API will be deprecated in the future, it is recommended to use + :code:`x.grad` which returns the tensor value of the gradient. + Get the Gradient of Current Tensor. Returns: @@ -253,7 +266,7 @@ def monkey_patch_varbase(): x = paddle.to_tensor(5., stop_gradient=False) y = paddle.pow(x, 4.0) y.backward() - print("grad of x: {}".format(x.grad)) + print("grad of x: {}".format(x.gradient())) # [500.] """ @@ -337,10 +350,37 @@ def monkey_patch_varbase(): @property def grad(self): """ - The alias of gradient(). - """ + .. warning:: + This API will return the tensor value of the gradient. If you want + to get the numpy value of the gradient, you can use :code:`x.grad.numpy()`. + + Get the Gradient of Current Tensor. + + Returns: + Tensor: the gradient of current Tensor + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor(5., stop_gradient=False) + y = paddle.pow(x, 4.0) + y.backward() + print("grad of x: {}".format(x.grad)) + # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False, [500.]) - return self.gradient() + """ + msg = 'tensor.grad will return the tensor value of the gradient.' \ + ' This is an incompatible upgrade for tensor.grad API. ' \ + ' It\'s return type changes from numpy.ndarray in version 2.0 to paddle.Tensor in version 2.1.0. ' \ + ' If you want to get the numpy value of the gradient, you can use :code:`x.grad.numpy()`' + warning_msg = "\033[93m\nWarning:\n%s \033[0m" % (msg) + # ensure ANSI escape sequences print correctly in cmd and powershell + if sys.platform.lower() == 'win32': + warning_msg = "\nWarning:\n%s " % (msg) + warnings.warn(warning_msg) + return self._grad_ivar() def clear_grad(self): """ @@ -348,6 +388,49 @@ def monkey_patch_varbase(): """ self.clear_gradient() + def item(self, *args): + """ + Convert one element Tensor to a Python scalar. + + Args: + *args(int): The input coordinates. If it's single int, the data in the corresponding order of flattened Tensor will be returned. + Default: None, and it must be in the case where Tensor has only one element. + + Returns(Python scalar): A Python scalar, whose dtype is corresponds to the dtype of Tensor. + + Raises: + ValueError: If the Tensor has more than one element, there must be coordinates. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor(1) + print(x.item()) #1 + print(type(x.item())) # + + x = paddle.to_tensor(1.0) + print(x.item()) #1.0 + print(type(x.item())) # + + x = paddle.to_tensor(True) + print(x.item()) #True + print(type(x.item())) # + + x = paddle.to_tensor(1+1j) + print(x.item()) #(1+1j) + print(type(x.item())) # + + x = paddle.to_tensor([[1.1, 2.2, 3.3]]) + print(x.item(2)) #3.3 + print(x.item(0, 2)) #3.3 + + x = paddle.to_tensor([1, 2]) + x.item() #ValueError: only one element tensor can be converted to Python scalar when no input coordinates. + """ + return self._getitem_from_offset(*args).item() + @property def inplace_version(self): """ @@ -435,7 +518,95 @@ def monkey_patch_varbase(): return self.__nonzero__() def __array__(self, dtype=None): - return self.numpy().astype(dtype) + """ + Returns a numpy array shows the value of current Tensor. + + Returns: + ndarray: The numpy value of current Tensor. + + Returns type: + ndarray: dtype is same as current Tensor + + Examples: + .. code-block:: python + + import paddle + import numpy as np + x = paddle.randn([2, 2]) + x_array = np.array(x) + + print(type(x_array)) # + print(x_array.shape) #(2, 2) + """ + array = self.numpy() + if dtype: + array = array.astype(dtype) + return array + + def contain_tensor(item): + if not isinstance(item, tuple): + item = [item] + + for slice_item in item: + if isinstance(slice_item, slice): + if isinstance(slice_item.start, Variable) \ + or isinstance(slice_item.stop, Variable) \ + or isinstance(slice_item.step, Variable): + return True + else: + if isinstance(slice_item, Variable): + return True + return False + + def __getitem__(self, item): + def is_list_tuple(index, contain_type): + def _is_list_tuple(item): + if not (isinstance(item, (list, tuple)) or + type(item) == contain_type): + return False + if isinstance(item, (tuple, list)): + for s in item: + if not _is_list_tuple(s): + return False + return True + + if not isinstance(index, (tuple, list)): + return False + for s in index: + if not _is_list_tuple(s): + return False + return True + + if contain_tensor(item) or is_list_tuple(item, int): + # 1. Call _getitem_impl_ when item contains tensor. + # Why not call a c++ function ? Because item can't be parsed when it contains tensor. + return _getitem_impl_(self, item) + + else: + # 2. Call c++ func getitem_index_not_tensor to speedup. + return self._getitem_index_not_tensor(item) + + def __setitem__(self, item, value): + def contain_tensor_or_list(item): + if not isinstance(item, tuple): + item = [item] + + for slice_item in item: + if isinstance(slice_item, list): + return True + elif isinstance(slice_item, Variable): + return True + + return False + + if contain_tensor_or_list(item): + # To reuse code with static graph, + # Call _setitem_impl_ when item contains tensor or list. + return _setitem_impl_(self, item, value) + + else: + # Call c++ func __setitem_varbase__ to speedup. + return self.__setitem_varbase__(item, value) for method_name, method in ( ("__bool__", __bool__), ("__nonzero__", __nonzero__), @@ -445,7 +616,9 @@ def monkey_patch_varbase(): ("gradient", gradient), ("register_hook", register_hook), ("__str__", __str__), ("__repr__", __str__), ("__deepcopy__", __deepcopy__), ("__module__", "paddle"), - ("__name__", "Tensor"), ("__array__", __array__)): + ("__name__", "Tensor"), ("__array__", __array__), + ("__getitem__", __getitem__), ("item", item), + ("__setitem__", __setitem__)): setattr(core.VarBase, method_name, method) # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index 0885891cdbe02747c8babbdbe748f21b30c34598..21f506d03ce68e7eb47d185c06aeab5f4ba4cabd 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -20,6 +20,8 @@ import unittest import numpy as np import paddle +from paddle.fluid.layer_helper import LayerHelper +from functools import reduce class TestSetValueBase(unittest.TestCase): @@ -333,6 +335,134 @@ class TestSetValueItemTensor6(TestSetValueApi): self.data[2:0:-1, 0:2, ::-1] = self.value +# 1.5 item is None +class TestSetValueItemNone1(TestSetValueApi): + def _call_setitem(self, x): + x[None] = self.value + + def _get_answer(self): + self.data[None] = self.value + + +class TestSetValueItemNone2(TestSetValueApi): + def _call_setitem(self, x): + x[0, None, 1] = self.value + + def _get_answer(self): + self.data[0, None, 1] = self.value + + +class TestSetValueItemNone3(TestSetValueApi): + def _call_setitem(self, x): + x[:, None, None, 1] = self.value + + def _get_answer(self): + self.data[:, None, None, 1] = self.value + + +class TestSetValueItemNone4(TestSetValueApi): + def _call_setitem(self, x): + x[0, 0, None, 1] = self.value + + def _get_answer(self): + self.data[0, 0, None, 1] = self.value + + +class TestSetValueItemNone5(TestSetValueApi): + def _call_setitem(self, x): + x[0, None, 0, None, 1] = self.value + + def _get_answer(self): + self.data[0, None, 0, None, 1] = self.value + + +class TestSetValueItemNone6(TestSetValueApi): + def _call_setitem(self, x): + x[None, 0, 0, None, 0] = self.value + + def _get_answer(self): + self.data[None, 0, 0, None, 0] = self.value + + +class TestSetValueItemNone7(TestSetValueApi): + def _call_setitem(self, x): + x[:, None, 1] = np.zeros(self.shape)[:, None, 0] + + def _get_answer(self): + self.data[:, None, 1] = np.zeros(self.shape)[:, None, 0] + + +class TestSetValueItemNone8(TestSetValueApi): + def _call_setitem(self, x): + x[:, 1, None] = np.zeros(self.shape)[:, 0, None] + + def _get_answer(self): + self.data[:, 1, None] = np.zeros(self.shape)[:, 0, None] + + +class TestSetValueItemNone9(TestSetValueApi): + def _call_setitem(self, x): + x[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] + + def _get_answer(self): + self.data[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] + + +# 1.5 item is list or Tensor of bol +class TestSetValueItemBool1(TestSetValueApi): + def _call_setitem(self, x): + x[[True, False]] = self.value + + def _get_answer(self): + self.data[[True, False]] = self.value + + +class TestSetValueItemBool2(TestSetValueApi): + def _call_setitem(self, x): + x[[False, False]] = self.value + + def _get_answer(self): + self.data[[False, False]] = self.value + + +class TestSetValueItemBool3(TestSetValueApi): + def _call_setitem(self, x): + x[[False, True]] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[[False, True]] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool4(TestSetValueApi): + def _call_setitem(self, x): + idx = paddle.assign(np.array([False, True])) + x[idx] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[np.array([False, True])] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool5(TestSetValueApi): + def _call_setitem(self, x): + idx = paddle.assign( + np.array([[False, True, False], [True, True, False]])) + x[idx] = self.value + + def _get_answer(self): + self.data[np.array([[False, True, False], [True, True, False] + ])] = self.value + + +class TestSetValueItemBool6(TestSetValueApi): + def _call_setitem(self, x): + x[0, ...] = 0 + x[x > 0] = self.value + + def _get_answer(self): + self.data[0, ...] = 0 + self.data[self.data > 0] = self.value + + # 2. Test different type of value: int, float, numpy.ndarray, Tensor # 2.1 value is int32, int64, float32, float64, bool @@ -755,6 +885,21 @@ class TestError(TestSetValueBase): one = paddle.ones([1]) x[::one] = self.value + def _bool_list_error(self): + with self.assertRaises(TypeError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + x[[True, False, 0]] = 0 + + with self.assertRaises(IndexError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + x[[True, False], [True, False]] = 0 + + def _bool_tensor_error(self): + with self.assertRaises(IndexError): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + idx = paddle.assign([True, False, True]) + x[idx] = 0 + def _broadcast_mismatch(self): program = paddle.static.Program() with paddle.static.program_guard(program): @@ -762,8 +907,7 @@ class TestError(TestSetValueBase): value = np.array([3, 4, 5, 6, 7]) x[0] = value exe = paddle.static.Executor(paddle.CPUPlace()) - with self.assertRaisesRegexp(ValueError, - "Broadcast dimension mismatch."): + with self.assertRaises(ValueError): exe.run(program) def test_error(self): @@ -772,8 +916,391 @@ class TestError(TestSetValueBase): self._value_type_error() self._dtype_error() self._step_error() + self._bool_list_error() + self._bool_tensor_error() self._broadcast_mismatch() +# 5. Test backward + + +class Model(paddle.nn.Layer): + def __init__(self): + super(Model, self).__init__() + self.conv = paddle.nn.Conv2D(12, 12, 3) + + def forward(self, x, y): + x = self.conv(x) + y = self.conv(y) + var = y.flatten() + + x[0, :, 0, 0] = var + loss = paddle.mean(x) + return loss, var, x + + +class TestBackward(unittest.TestCase): + def test_static(self): + paddle.enable_static() + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + + x_np = np.random.random(size=(4, 4)).astype('float32') + y_np = np.random.random(size=(4, 4)).astype('float32') + label_np = np.random.randint(2, size=(4, 1)).astype('int64') + + with paddle.static.program_guard(main_program, startup_program): + x = paddle.static.data(name="x", shape=[4, 4], dtype='float32') + y = paddle.static.data(name="y", shape=[4, 4], dtype='float32') + + label = paddle.static.data( + name="label", shape=[4, 1], dtype='int64') + + z = paddle.add(x, y) + var = y[0, :] + z[0, :] = var + + prediction = paddle.static.nn.fc(x=z, size=2, activation='softmax') + + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label) + loss = paddle.mean(cost) + sgd = paddle.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + exe = paddle.static.Executor(paddle.CPUPlace()) + exe.run(startup_program) + + var_grad, z_grad = exe.run( + main_program, + feed={"x": x_np, + "y": y_np, + "label": label_np}, + fetch_list=[var.name + "@GRAD", z.name + "@GRAD"]) + + self.assertTrue((var_grad == z_grad[0, :]).all()) + + def test_dynamic(self): + paddle.disable_static() + model = Model() + x = paddle.ones([1, 12, 3, 3]).astype("float32") + y = paddle.ones([1, 12, 3, 3]).astype("float32") + loss, var, x = model(x, y) + loss.backward() + + self.assertTrue(var.grad.shape == x.grad[0, :, 0, 0].shape) + # + self.assertTrue((0 == x.grad[0, :, 0, 0]).all()) + + +class TestGradientTruncated(unittest.TestCase): + def test_consistent_with_competitor(self): + paddle.disable_static() + + def set_value(t, value): + a = t * t + a[0, 1] = value + y = a * a + return y.sum() + + # case 1 + array = np.arange( + 1, 1 + 2 * 3 * 4, dtype="float32").reshape([1, 2, 1, 3, 1, 4]) + value = np.arange(100, 104, dtype="float32").reshape(1, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps, value) + loss.backward() + + value_grad = np.array([[600., 606., 612., 618.]]) + input_grad = np.array( + [[[[[[4., 32., 108., 256.]], [[500., 864., 1372., 2048.]], + [[2916., 4000., 5324., 6912.]]]], + [[[[0., 0., 0., 0.]], [[0., 0., 0., 0.]], [[0., 0., 0., 0.]]]]]]) + self.assertTrue( + np.array_equal(inps.grad.numpy(), input_grad), + msg="The gradient of value should be \n{},\n but reveived {}". + format(input_grad, inps.grad.numpy())) + self.assertTrue( + np.array_equal(value.grad.numpy(), value_grad), + msg="The gradient of input should be \n{},\n but reveived {}". + format(value_grad, value.grad.numpy())) + + # case 2 + array = np.arange(1, 2 * 3 * 4 + 1, dtype="float32").reshape([4, 2, 3]) + value = np.arange(100, 100 + 1, dtype="float32") + + inps2 = paddle.to_tensor(array, stop_gradient=False) + value2 = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps2, value2) + loss.backward() + + value_grad2 = np.array([600.]) + input_grad2 = np.array( + [[[4., 32., 108.], [0., 0., 0.]], [[1372., 2048., 2916.], + [4000., 5324., 6912.]], + [[8788., 10976., 13500.], [16384., 19652., 23328.]], + [[27436., 32000., 37044.], [42592., 48668., 55296.]]]) + self.assertTrue( + np.array_equal(inps2.grad.numpy(), input_grad2), + msg="The gradient of value should be \n{},\n but reveived {}". + format(input_grad, inps2.grad.numpy())) + self.assertTrue( + np.array_equal(value2.grad.numpy(), value_grad2), + msg="The gradient of input should be \n{},\n but reveived {}". + format(value_grad, value2.grad.numpy())) + + # case 3 + def set_value3(t, value): + a = t * t + a[0, :, 0, :] = value + y = a * a + return y.sum() + + array = np.arange( + 1, 1 + 2 * 3 * 4, dtype="float32").reshape([4, 3, 1, 1, 2, 1]) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value3(inps, value) + loss.backward() + + value_grad = np.array([[[600.], [606.]]]) + input_grad = np.array( + [[[[[[0.], [0.]]]], [[[[0.], [0.]]]], [[[[0.], [0.]]]]], + [[[[[1372.], [2048.]]]], [[[[2916.], [4000.]]]], + [[[[5324.], [6912.]]]]], [[[[[8788.], [10976.]]]], [[[[13500.], + [16384.]]]], + [[[[19652.], [23328.]]]]], + [[[[[27436.], [32000.]]]], [[[[37044.], [42592.]]]], + [[[[48668.], [55296.]]]]]]) + self.assertTrue( + np.array_equal(inps.grad.numpy(), input_grad), + msg="The gradient of value should be \n{},\n but reveived {}". + format(input_grad, inps.grad.numpy())) + self.assertTrue( + np.array_equal(value.grad.numpy(), value_grad), + msg="The gradient of input should be \n{},\n but reveived {}". + format(value_grad, value.grad.numpy())) + + #case 4: step >0 + def set_value4(t, value): + a = t * t + a[0, :, 0, ::3] = value + y = a * a + return y.sum() + + array = np.arange( + 1, 1 + 2 * 3 * 4, dtype="float32").reshape([2, 3, 1, 4, 1]) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value4(inps, value) + loss.backward() + + value_grad = np.array([[[600.], [606.]]]) + input_grad = np.array([[[[[0.], [32.], [108.], + [0.]]], [[[0.], [864.], [1372.], [0.]]], + [[[0.], [4000.], [5324.], [0.]]]], + [[[[8788.], [10976.], [13500.], [16384.]]], + [[[19652.], [23328.], [27436.], [32000.]]], + [[[37044.], [42592.], [48668.], [55296.]]]]]) + self.assertTrue( + np.array_equal(inps.grad.numpy(), input_grad), + msg="The gradient of value should be \n{},\n but reveived {}". + format(input_grad, inps.grad.numpy())) + self.assertTrue( + np.array_equal(value.grad.numpy(), value_grad), + msg="The gradient of input should be \n{},\n but reveived {}". + format(value_grad, value.grad.numpy())) + + # case 5:a[0].shape==value.shape + def set_value5(t, value): + a = t * t + a[0] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape([2, 3, 4]) + value = np.arange(100, 100 + 12, dtype="float32").reshape(3, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value5(inps, value) + loss.backward() + + value_grad = np.array([[200., 202., 204., 206.], + [208., 210., 212., 214.], + [216., 218., 220., 222.]]) + input_grad = np.array([[[0., 0., 0., 0.], [0., 0., 0., 0.], + [0., 0., 0., 0.]], + [[8788., 10976., 13500., 16384.], + [19652., 23328., 27436., 32000.], + [37044., 42592., 48668., 55296.]]]) + self.assertTrue( + np.array_equal(inps.grad.numpy(), input_grad), + msg="The gradient of value should be \n{},\n but reveived {}". + format(input_grad, inps.grad.numpy())) + self.assertTrue( + np.array_equal(value.grad.numpy(), value_grad), + msg="The gradient of input should be \n{},\n but reveived {}". + format(value_grad, value.grad.numpy())) + + def test_static_graph(self): + paddle.enable_static() + + to_string = lambda x, i, : x + '_' + str(i) + numel = lambda input_shape: reduce(lambda x, y: x * y, input_shape) + + def op1(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + # test stop_gradient + value.stop_gradient = True + x.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True) + step = paddle.fluid.layers.fill_constant( + [1], "int32", -2, force_cpu=True) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [start, ], + 'EndsTensorList': [end, ], + 'StepsTensorList': [step, ] + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}) + + return y, value + + def op2(x): + value = paddle.fluid.layers.fill_constant([1, 3, 2], "float32", 1) + # test stop_gradient + value.stop_gradient = False + x.stop_gradient = False + attrs = { + 'axes': [0], + 'starts': [6], + 'ends': [0], + 'steps': [-4], + 'decrease_axes': [], + 'none_axes': [], + 'dtype': paddle.float32 + } + inputs = {'Input': x, 'ValueTensor': value} + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs=attrs) + + return y, value + + def op3(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + x.stop_gradient = True + value.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True) + step = paddle.fluid.layers.fill_constant( + [1], "int32", 3, force_cpu=True) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [start, ], + 'EndsTensorList': [end, ], + 'StepsTensorList': [step, ] + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}) + + return y, value + + def set_value(array, i, op): + name_x = to_string('x', i) + x = paddle.static.data( + name=name_x, shape=array.shape, dtype='float32') + + # set_value_op in __get/setitem__ is an inplace operation. + # When `input.stop_gradient = True` and `value.stop_gradient = False`, + # set_value_grad_op will not be run during backward. + y, value = op(x) + + y2 = y + 1 + loss = paddle.fluid.layers.reduce_sum(y2) + sgd = paddle.optimizer.Adam() + sgd.minimize(loss) + place = paddle.fluid.CPUPlace( + ) if not paddle.fluid.core.is_compiled_with_cuda( + ) else paddle.fluid.CUDAPlace(0) + + prog = paddle.static.default_main_program() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + fetch_list = [] + if not x.stop_gradient: + fetch_list.append(x.grad_name) + if not value.stop_gradient: + fetch_list.append(value.grad_name) + out = exe.run(prog, feed={x.name: array}, fetch_list=fetch_list) + return out + + input_shape = [7, 6, 5, 4, 3, 2] + + array = np.arange( + 0, numel(input_shape), dtype="float32").reshape(input_shape) + + for i in range(len(input_shape)): + program = paddle.static.Program() + with paddle.static.program_guard(program): + out1 = set_value(array, i, op1) + self.assertTrue((out1[0][5:0:-2] == 0).all()) + + if len(array.shape) > 2: + program2 = paddle.static.Program() + with paddle.static.program_guard(program2): + out2 = set_value(array, i, op2) + self.assertTrue((out2[0][6:0:-4] == 0).all()) + + program3 = paddle.static.Program() + with paddle.static.program_guard(program3): + out3 = set_value(array, i, op3) + self.assertTrue((numel(out1[0][0:5:3].shape) == out3[0]).all()) + + array = array[0] + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/variable_index.py b/python/paddle/fluid/variable_index.py new file mode 100644 index 0000000000000000000000000000000000000000..1b9a82ba85f05a92c3f783081b4bbb3570250272 --- /dev/null +++ b/python/paddle/fluid/variable_index.py @@ -0,0 +1,701 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import numpy as np +from . import unique_name +from . import core +import paddle + +MAX_INTEGER = 2**31 - 1 + + +def is_list_tuple(index, contain_type): + def _is_list_tuple(item): + if not (isinstance(item, (list, tuple)) or type(item) == contain_type): + return False + if isinstance(item, (tuple, list)): + for s in item: + if not _is_list_tuple(s): + return False + return True + + if not isinstance(index, (tuple, list)): + return False + for s in index: + if not _is_list_tuple(s): + return False + return True + + +def is_one_dim_list(index, contain_type): + if isinstance(index, list): + for i in index: + if not isinstance(i, contain_type): + return False + else: + return False + return True + + +def get_list_index_shape(var_dims, index_dims): + var_dims_size = len(var_dims) + index_dims_size = len(index_dims) + + out_dims_size = var_dims_size - index_dims[0] + index_dims_size - 1 + + out_dims_shape = [1] * out_dims_size + + out_dims_shape[:index_dims_size - 1] = index_dims[1:] + + out_dims_shape[index_dims_size - 1:] = var_dims[index_dims[0]:] + return out_dims_shape + + +class SliceInfo: + def __init__(self): + self.pre_shape = None + self.indexes = [] + + def update(self, index): + if is_list_tuple(index, int) or isinstance(index, ( + paddle.fluid.Variable, np.ndarray)): + # convert index to Tensor + if not isinstance(index, paddle.fluid.Variable): + index = paddle.assign(index) + + self.indexes.append(index) + + if self.pre_shape is None: + self.pre_shape = index.shape + else: + if self.pre_shape != index.shape: + # broadcast + cur_shape = paddle.broadcast_shape(self.pre_shape, + index.shape) + for i in range(len(self.indexes)): + self.indexes[i] = paddle.broadcast_to(self.indexes[i], + cur_shape) + self.pre_shape = self.indexes[-1].shape + else: + raise ValueError( + "Index should be list/tuple of int or Tensor, but received {}.". + format(index)) + + def shape_stride(self, shape): + s = [1] * len(shape) + for i in range(len(shape) - 2, -1, -1): + s[i] = shape[i + 1] * s[i + 1] + + return s + + def numel(self, shape): + return reduce(lambda x, y: x * y, shape) + + def get_offset_stride(self, tensor_shape): + for index in self.indexes: + if not isinstance(index, paddle.fluid.Variable): + raise ValueError( + "only support list/tensor index, but received {}.".format( + type(index))) + + if len(self.indexes) <= len(tensor_shape) or len(self.indexes) == 1: + shape = paddle.stack(self.indexes) + axes = list(range(1, len(self.pre_shape) + 1)) + [0, ] + + else: + raise ValueError( + "too many indices for tensor: tensor is {}-dimensional, but {} were indexed". + format(len(tensor_shape), self.pre_shape[0])) + + shape_transpose = paddle.transpose(shape, axes) + return shape_transpose + + def get_item(self, tensor): + shape_transpose = self.get_offset_stride(tensor.shape) + index = paddle.assign(shape_transpose) + return paddle.gather_nd(tensor, index) + + def set_item(self, tensor_origin, value): + + if not isinstance(value, paddle.fluid.Variable): + value = paddle.assign(value) + tensor_type = None + + if tensor_origin.dtype in [ + core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64 + ]: + tensor = tensor_origin + else: + tensor_type = tensor_origin.dtype + tensor = tensor_origin.astype(core.VarDesc.VarType.FP32) + + if value.dtype != tensor.dtype: + value = value.astype(tensor.dtype) + + shape_transpose = self.get_offset_stride(tensor_origin.shape) + index = paddle.assign(shape_transpose) + + gather_tensor_shape = get_list_index_shape( + tensor.shape, [len(self.indexes), ] + list(self.indexes[-1].shape)) + + value_dims_bd = [1, ] * len(gather_tensor_shape) + value_dims_bd[-len(value.shape):] = list(value.shape) + + for i in range(len(gather_tensor_shape)): + if not (value_dims_bd[i] == gather_tensor_shape[i] or + value_dims_bd[i] == 1): + raise ValueError("{} can not broadcast into {}".format( + value.shape, gather_tensor_shape)) + + value_broadcast = paddle.broadcast_to(value, gather_tensor_shape) + + value_1d = value_broadcast.reshape([-1] + gather_tensor_shape[len( + index.shape) - 1:]) + + index_1d = index.reshape([-1, index.shape[-1]]) + + tensor_stride = paddle.assign( + self.shape_stride(tensor.shape[:index.shape[-1]])) + inds = [] + for i in range(index_1d.shape[0]): + temp = (index_1d[i] * tensor_stride).sum() + inds.append(temp) + index_1d = paddle.stack(inds).reshape([-1]) + t_reshape = tensor.reshape([-1] + list(tensor.shape[index.shape[-1]:])) + out = paddle.scatter(t_reshape, index_1d, value_1d) + if tensor_type is not None: + out = out.astype(tensor_type) + tensor_origin[:] = out.reshape(tensor_origin.shape) + + return tensor_origin + + +def replace_ellipsis(var, item): + from .framework import Variable + # Use slice(None) to replace Ellipsis. + # For var, var.shape = [3,4,5,6] + # + # var[..., 1:2] -> var[:, :, :, 1:2] + # var[0, ...] -> var[0] + # var[0, ..., 1:2] -> var[0, :, :, 1:2] + + item = list(item) + + # Remove Variable to skip bug when counting Ellipsis + item_remove_var = [ + ele for ele in item if not isinstance(ele, (Variable, np.ndarray)) + ] + ell_count = item_remove_var.count(Ellipsis) + if ell_count == 0: + return item + elif ell_count > 1: + raise IndexError("An index can only have a single ellipsis ('...')") + + ell_idx = item.index(Ellipsis) + + if ell_idx == len(item) - 1: + return item[:-1] + else: + item[ell_idx:ell_idx + 1] = [slice(None)] * ( + len(var.shape) - len(item) + 1) + + return item + + +def replace_none(item): + new_item = [] + none_axes = [] + for i, slice_item in enumerate(item): + if slice_item is None: + none_axes.append(i) + else: + new_item.append(slice_item) + return new_item, none_axes + + +def is_integer_or_scalar_tensor(ele): + from .framework import Variable + if isinstance(ele, int): + return True + elif isinstance(ele, Variable): + if len(ele.shape) == 1 and ele.shape[0] == 1: + return True + return False + + +def deal_attrs(attrs, attr, attr_name, tensor_attr_name, inputs, infer_flags): + from .framework import Variable + from .layers import utils + + if utils._contain_var(attr): + inputs[tensor_attr_name] = utils._convert_to_tensor_list( + attr, dtype="int64") + for i, dim in enumerate(attr): + if isinstance(dim, Variable): + attrs[attr_name].append(-1) + infer_flags[i] = -1 + else: + attrs[attr_name].append(dim) + else: + attrs[attr_name] = attr + + +def _getitem_impl_(var, item): + """ + Slice the variable. + + Args: + item(int/slice/tuple) : the index. + + Returns: + Sliced variable + """ + from .framework import default_main_program, Variable + if isinstance(item, list): + if not is_one_dim_list(item, int): + item = tuple(item) + + if not isinstance(item, tuple): + item = (item, ) + + decrease_axes = [] + axes = [] + starts = [] + ends = [] + steps = [] + reverse_axes = [] + + use_strided_slice = False + item, none_axes = replace_none(item) + item = replace_ellipsis(var, item) + slice_info = SliceInfo() + + for dim, slice_item in enumerate(item): + if is_integer_or_scalar_tensor(slice_item): + if isinstance(slice_item, + int) and var.shape[dim] is not None and var.shape[ + dim] >= 0 and slice_item >= var.shape[dim]: + # For python, if users write a, b = var, the __getitem__ + # method will iterate through 0, 1, 2 ... until __getitem__ + # throws an IndexError, then stop. The var[0], var[1] will + # be given to a, b respectively. If more values are given, + # the unpack size would cause error. + # + # We raises IndexError here to support grammar like `a, b = var` + raise IndexError( + "slice_item %d at dim %d should be >= 0 and < var.shape[%d]: %d" + % (slice_item, dim, dim, var.shape[dim])) + decrease_axes.append(dim) + start = slice_item + step = 1 + end = slice_item + 1 if slice_item != -1 else MAX_INTEGER + + elif isinstance(slice_item, slice): + start = slice_item.start + end = slice_item.stop + step = slice_item.step + + if start is None and end is None and step is None: + continue + + step = 1 if step is None else step + + if start is None: + start = 0 if step > 0 else MAX_INTEGER + if end is None: + end = MAX_INTEGER if step > 0 else -1 + + elif isinstance(slice_item, list): + all_bool = True + + if is_list_tuple(slice_item, int): + slice_info.update(slice_item) + continue + + for i in slice_item: + if type(i) is int: + all_bool = False + elif not isinstance(i, bool): + raise TypeError("Only support int or bool in index list.") + + if len(item) != 1: + raise IndexError( + "When index contains a list, its length must be 1, but received {}.". + format(len(item))) + new_slice_item = [] + if all_bool: + if len(slice_item) != var.shape[0]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along "\ + "dimension 0, the target dimension is {}, but received {}.". + format(var.shape[0], len(slice_item))) + for idx, ele in enumerate(slice_item): + if ele is True: + new_slice_item.append(idx) + slice_item = new_slice_item + else: + for idx, ele in enumerate(slice_item): + if type(ele) is int: + new_slice_item.append(ele) + elif ele is True: + new_slice_item.append(1) + else: + new_slice_item.append(0) + slice_item = new_slice_item + + from .layers import assign + from ..tensor import index_select + + idx = assign(np.array(slice_item).astype("int32")) + return index_select(var, index=idx, axis=0) + + elif isinstance(slice_item, np.ndarray): + slice_info.update(slice_item) + continue + elif isinstance(slice_item, (Variable)): + if len(item) == 1: + + from ..tensor import index_select, gather_nd + from .layers.nn import where + + if slice_item.dtype == paddle.bool: + if len(slice_item.shape) > len(var.shape): + raise IndexError( + "The dims of bool index doesn't match indexed array, " + "the dims of bool index except to be equal or less " + "than {}, but received {}.".format( + len(var.shape), len(slice_item.shape))) + for i, dim_len in enumerate(slice_item.shape): + if dim_len != var.shape[i]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along "\ + "dimension {}, the target dimension is {}, but received {}.". + format(i, var.shape[i], dim_len)) + bool_2_idx = where(slice_item == True) + return gather_nd(var, bool_2_idx) + else: + if len(slice_item.shape) == 1: + return index_select(var, index=slice_item, axis=0) + else: + slice_info.update(slice_item) + continue + else: + slice_info.update(slice_item) + continue + + else: + raise IndexError( + "Valid index accept int or slice or ellipsis or list, but received {}.". + format(slice_item)) + + axes.append(dim) + starts.append(start) + ends.append(end) + steps.append(step) + use_strided_slice = True if step != 1 else use_strided_slice + + if slice_info.indexes: + if len(slice_info.indexes) != len(item): + raise IndexError( + "Valid index accept int or slice or ellipsis or list, but received {}.". + format(item)) + return slice_info.get_item(var) + + inputs = {'Input': [var]} + attrs = { + 'axes': axes, + 'starts': [], + 'ends': [], + 'decrease_axis': decrease_axes + } + if use_strided_slice: + attrs['strides'] = [] + + infer_flags = [1] * len(axes) + deal_attrs(attrs, starts, "starts", "StartsTensorList", inputs, infer_flags) + deal_attrs(attrs, ends, "ends", "EndsTensorList", inputs, infer_flags) + deal_attrs(attrs, steps, "strides", "StridesTensorList", inputs, + infer_flags) + attrs['infer_flags'] = infer_flags + + out = var + if len(axes) > 0: + target_block = default_main_program().current_block() + op_type = "strided_slice" if use_strided_slice else "slice" + + slice_out_var = target_block.create_var( + name=unique_name.generate_with_ignorable_key(var.name + "_" + + op_type), + dtype=var.dtype) + target_block.append_op( + type=op_type, + inputs=inputs, + outputs={'Out': [slice_out_var]}, + attrs=attrs) + out = slice_out_var + + if len(reverse_axes) > 0: + from .layers.tensor import reverse + out = reverse(out, axis=reverse_axes) + + # Deal with cases when all axes are decreased. + # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. + # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. + # For example: + # # x.shape: (2,3,4) + # out = x[0, 1, 1, None] # out.shape : (1) + if len(decrease_axes) == len(var.shape): + none_axes = none_axes[1:] + + if len(none_axes) > 0: + # Deal with cases that decrease_axes is not empty + # For example: + # # x.shape: (2,3,4) + # out = x[0, 0:2, None] # out.shape : (2, 1, 4) + for idx, axis in enumerate(none_axes): + l = len([i for i in decrease_axes if i < axis]) + new_axis = axis - l + none_axes[idx] = new_axis + + # Deal with cases when all axes are decreased. + # After slice, the shape of out is [1], which should have been [], but Paddle doesn't support scalar. + # In order to ensure the correctness of the final shape of out, one dimension of out needs to be decreased. + # For example: + # # x.shape: (2,3,4) + # out = x[0, 1, 1, None] # out.shape : (1) + + from ..tensor import unsqueeze + out = unsqueeze(out, axis=none_axes) + + return out + + +def _setitem_impl_(var, item, value): + from .framework import default_main_program, Variable + + inputs = {'Input': var} + if isinstance(item, list): + if not is_one_dim_list(item, int): + item = tuple(item) + # 1. Parse item + if not isinstance(item, tuple): + item = (item, ) + + decrease_axes = [] + axes = [] + starts = [] + ends = [] + steps = [] + + item, none_axes = replace_none(item) + item = replace_ellipsis(var, item) + slice_info = SliceInfo() + dim = 0 + for _, slice_item in enumerate(item): + if is_integer_or_scalar_tensor(slice_item): + decrease_axes.append(dim) + start = slice_item + end = slice_item + 1 if slice_item != -1 else MAX_INTEGER + step = 1 + + elif isinstance(slice_item, slice): + start = slice_item.start + end = slice_item.stop + step = slice_item.step + + if start is None and end is None and step is None: + dim += 1 + continue + + step = 1 if step is None else step + + if not isinstance(step, Variable) and step == 0: + raise ValueError( + "When assign a value to a paddle.Tensor, step can not be 0, " + "but received step is {}.".format(step)) + + if isinstance(step, Variable) and (start is None or end is None): + raise ValueError( + "When assign a value to a paddle.Tensor, it's not supported that " + "the start or end is None when the type of step is paddle.Tensor." + ) + + if start is None: + start = 0 if step > 0 else MAX_INTEGER + + if end is None: + end = MAX_INTEGER if step > 0 else (0 - MAX_INTEGER) + elif isinstance(slice_item, list): + if is_list_tuple(slice_item, int): + slice_info.update(slice_item) + continue + + for i in slice_item: + if not isinstance(i, bool): + raise TypeError("Doesn't support {} in index list.".format( + type(i))) + + if len(item) != 1: + raise IndexError( + "When index contains a bool list, its length must be 1, but received {}.". + format(len(item))) + + from .layers import assign + idx_tensor = assign(slice_item) + return set_value_for_bool_tensor(var, idx_tensor, value) + + elif isinstance(slice_item, np.ndarray): + slice_info.update(slice_item) + continue + + elif isinstance(slice_item, Variable): + if slice_item.dtype == core.VarDesc.VarType.BOOL: + if len(item) != 1: + raise IndexError( + "When index contains a bool tensor, its length must be 1, but received {}.". + format(len(item))) + return set_value_for_bool_tensor(var, slice_item, value) + else: + slice_info.update(slice_item) + continue + else: + raise IndexError( + "Valid index accept int, slice, ellipsis, None, list of bool, Variable, " + "but received {}.".format(slice_item)) + + axes.append(dim) + starts.append(start) + ends.append(end) + steps.append(step) + + dim += 1 + if slice_info.indexes: + if len(slice_info.indexes) != len(item): + raise IndexError( + "Valid index accept int or slice or ellipsis or list, but received {}.". + format(item)) + return slice_info.set_item(var, value) + attrs = { + 'axes': axes, + 'starts': starts, + 'ends': ends, + 'steps': steps, + 'decrease_axes': decrease_axes, + 'none_axes': none_axes + } + + from .layers import utils + if utils._contain_var(starts): + inputs['StartsTensorList'] = utils._convert_to_tensor_list(starts) + del attrs['starts'] + if utils._contain_var(ends): + inputs['EndsTensorList'] = utils._convert_to_tensor_list(ends) + del attrs['ends'] + if utils._contain_var(steps): + inputs['StepsTensorList'] = utils._convert_to_tensor_list(steps) + del attrs['steps'] + + # 2. Parse value + dtype = var.dtype + attrs['dtype'] = dtype + + from .data_feeder import convert_dtype + # 2.1 value is an integer of float + if isinstance(value, (int, float)): + value = np.array([value]).astype(convert_dtype(dtype)) + + # 2.2 value is a np.ndarray + if isinstance(value, np.ndarray): + shape = list(value.shape) + if dtype == core.VarDesc.VarType.BOOL: + value_name = "bool_values" + values = [bool(v) for v in value.flat] + elif dtype == core.VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in value.flat] + elif dtype == core.VarDesc.VarType.FP64: + value_name = "fp64_values" + values = [float(v) for v in value.flat] + elif dtype == core.VarDesc.VarType.INT32: + value_name = "int32_values" + values = [int(v) for v in value.flat] + elif dtype == core.VarDesc.VarType.INT64: + value_name = "int64_values" + values = [int(v) for v in value.flat] + else: + raise TypeError( + "When assign a numpy.ndarray, integer or float to a paddle.Tensor, " + "the data type of the paddle.Tensor must be bool, float32, int32 or int64, but " + "received %s." % convert_dtype(dtype)) + attrs[value_name] = values + attrs["shape"] = shape + + elif isinstance(value, Variable): + inputs["ValueTensor"] = value + else: + raise TypeError( + "Only support to assign an integer, float, numpy.ndarray or " + "paddle.Tensor to a paddle.Tensor, but received {}".format( + type(value))) + + cur_block = default_main_program().current_block() + cur_block.append_op( + type="set_value", inputs=inputs, outputs={'Out': var}, attrs=attrs) + + return var + + +# the item is a tensor of bool +def set_value_for_bool_tensor(var, item, value): + + # TODO(zyfncg): Now scatter_nd_add only support float32 and float64 tensor, + # so in the current version we also only support float32 and float64 tensor, + # this problem will be fixed in the future. + if var.dtype != core.VarDesc.VarType.FP32 and var.dtype != core.VarDesc.VarType.FP64: + raise TypeError("Only support float and double tensor for bool index, " + "but received {}.".format(var.dtype)) + + if len(item.shape) > len(var.shape): + raise IndexError("The dims of bool index doesn't match indexed array, " + "the dims of bool index except to be equal or less " + "than {}, but received {}.".format( + len(var.shape), len(item.shape))) + for i, dim_len in enumerate(item.shape): + if dim_len != var.shape[i]: + raise IndexError( + "The dimension of bool index doesn't match indexed array along " + "dimension {}, the target dimension is {}, but received {}.". + format(i, var.shape[i], dim_len)) + + def idx_not_empty(var, item, value): + from .framework import Variable + from .layers import assign + from .layers.nn import where + from ..tensor import gather_nd, scatter_nd_add + + if not isinstance(value, Variable): + value = assign(value).cast(var.dtype) + + idx = where(item) + gather_val = gather_nd(var, idx) + gather_val_new = value - gather_val + out = scatter_nd_add(var, idx, gather_val_new) + var[:] = out + + from .layers.control_flow import cond + # If all the bool index is False, just do nothing + cond(item.any(), lambda: idx_not_empty(var, item, value)) + + return var