diff --git a/cpu_context/CMakeLists.txt b/cpu_context/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa11abc8d90d2fba71537a23635d22549644ad3
--- /dev/null
+++ b/cpu_context/CMakeLists.txt
@@ -0,0 +1,213 @@
+cmake_minimum_required(VERSION 3.14.1)
+project(cpu_kernels_context)
+get_filename_component(TOP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.." ABSOLUTE)
+set(BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(proto_src_files
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/proto/cpu_attr.proto
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/proto/cpu_node_def.proto
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/proto/cpu_tensor.proto
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/proto/cpu_tensor_shape.proto
+)
+protobuf_generate(aicpu PROTO_SRCS PROTO_HDRS ${proto_src_files})
+set (local_context_src_files
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/node_def.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/node_def_impl.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/tensor.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/tensor_impl.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/tensor_shape.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/tensor_shape_impl.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/attr_value.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto/attr_value_impl.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/device.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/context.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/device_cpu_kernel.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/cpu_kernel_register.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/cpu_kernel_utils.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/host_sharder.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/device_sharder.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/eigen_threadpool.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/common/cpu_kernel_cache.cc
+    ${PROTO_SRCS}
+)
+
+set(local_context_stub_files
+   ${CMAKE_CURRENT_SOURCE_DIR}/stub/aicpu_sharder.cc
+)
+if(BUILD_OPEN_PROJECT)
+  set(_proto_include "${PROTO_BINARY_DIR}/aicpu")
+
+  set(local_context_inc_path
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/common
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto
+    ${CMAKE_CURRENT_SOURCE_DIR}/inc
+    ${CMAKE_CURRENT_SOURCE_DIR}/stub
+    #${AICPU_OPP_ENV}/inc
+    ${_proto_include}
+    ${Protobuf_INCLUDE}
+    ${FWKACLLIB_INCLUDE}
+    ${FWKACLLIB_INCLUDE}/aicpu/common
+    ${EIGEN_INCLUDE}
+  )
+
+  add_library(cpu_kernels_context_static STATIC
+    ${local_context_src_files}
+    $<$<NOT:$<STREQUAL:${PRODUCT_SIDE},device>>:${local_context_stub_files}>
+  )
+
+  add_dependencies(cpu_kernels_context_static eigen_headers)
+
+  target_include_directories(cpu_kernels_context_static PRIVATE
+    ${local_context_inc_path}
+  )
+
+  target_compile_definitions(cpu_kernels_context_static PRIVATE
+    _FORTIFY_SOURCE=2
+    google=ascend_private
+    $<$<NOT:$<STREQUAL:${PRODUCT_SIDE},device>>:LOG_CPP>
+  )
+
+  target_compile_options(cpu_kernels_context_static PRIVATE
+    -O2
+    -std=c++11
+    -ftrapv
+    -fstack-protector-all
+    -fPIC
+    $<$<STREQUAL:${PRODUCT_SIDE},device>:-fvisibility-inlines-hidden>
+    $<$<STREQUAL:${PRODUCT_SIDE},device>:-fvisibility=hidden>
+  )
+
+  target_link_libraries(cpu_kernels_context_static PRIVATE
+    $<BUILD_INTERFACE:intf_pub_aicpu>
+  )
+
+  set(cpu_kernels_context_static ${CMAKE_CURRENT_BINARY_DIR}/libcpu_kernels_context_static.a)
+
+  set_target_properties(cpu_kernels_context_static
+      PROPERTIES
+      OUTPUT_NAME cpu_kernels_context
+  )
+  if("x${PRODUCT_SIDE}" STREQUAL "xdevice")
+    set(CMAKE_CXX_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-target-linux-gnu-g++)
+    set(CMAKE_C_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-target-linux-gnu-gcc)
+
+    add_subdirectory(stub)
+
+    set(OPS_AICPU_PATH "${INSTALL_PATH}/aicpu")
+    cann_install(
+      TARGET      cpu_kernels_context_static
+      FILES       $<TARGET_FILE:cpu_kernels_context_static>
+      DESTINATION "${OPS_AICPU_PATH}"
+    )
+  endif()
+
+else()
+set(local_context_inc_path
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/common
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu_proto
+    ${TOP_DIR}/inc
+    ${TOP_DIR}/inc/aicpu
+    ${TOP_DIR}/inc/aicpu/common
+    ${TOP_DIR}/inc/aicpu/cpu_kernels
+    ${TOP_DIR}/inc/aicpu/aicpu_schedule/aicpu_sharder
+    ${TOP_DIR}/inc/external/aicpu
+    ${TOP_DIR}/libc_sec/include
+    ${TOP_DIR}/third_party/protobuf/include
+    ${TOP_DIR}/third_party/eigen/src/eigen-3.3.7
+    ${TOP_DIR}/out/${product}
+    ${CMAKE_BINARY_DIR}/proto/aicpu/proto/cpu_proto/proto
+    ${CMAKE_BINARY_DIR}/proto/aicpu/proto
+    ${CMAKE_BINARY_DIR}/proto/aicpu
+)
+
+add_library(cpu_kernels_context SHARED
+    ${local_context_src_files}
+    ${local_context_stub_files}
+)
+
+add_library(cpu_kernels_context_static STATIC
+    ${local_context_src_files}
+    ${local_context_stub_files}
+)
+
+target_link_libraries(cpu_kernels_context PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    slog
+    PUBLIC c_sec
+    ascend_protobuf
+    -ldl
+)
+
+
+##cpu_kernels_context
+target_include_directories(cpu_kernels_context PRIVATE
+     ${local_context_inc_path}
+)
+
+target_compile_definitions(cpu_kernels_context PRIVATE
+    _FORTIFY_SOURCE=2
+    $<$<STREQUAL:${PRODUCT_SIDE},host>:VISIBILITY>
+    google=ascend_private
+)
+
+target_compile_options(cpu_kernels_context PRIVATE
+    -O2
+    -std=c++11
+    -ftrapv
+    $<$<STREQUAL:${PRODUCT_SIDE},host>:-fvisibility-inlines-hidden>
+    $<$<STREQUAL:${PRODUCT_SIDE},host>:-fvisibility=hidden>
+)
+
+target_link_options(cpu_kernels_context PRIVATE
+    -Wl,-z,relro,-z,now
+    -s
+    $<$<STREQUAL:${PRODUCT_SIDE},host>:-Wl,-Bsymbolic -Wl,--exclude-libs,ALL>
+)
+
+target_include_directories(cpu_kernels_context_static PRIVATE
+     ${local_context_inc_path}
+)
+
+target_compile_definitions(cpu_kernels_context_static PRIVATE
+    _FORTIFY_SOURCE=2
+    google=ascend_private
+    $<$<NOT:$<STREQUAL:${PRODUCT_SIDE},device>>:LOG_CPP>
+)
+
+target_compile_options(cpu_kernels_context_static PRIVATE
+    -O2
+    -std=c++11
+    -ftrapv
+    -fstack-protector-all
+    $<$<STREQUAL:${PRODUCT_SIDE},device>:-fvisibility-inlines-hidden>
+    $<$<STREQUAL:${PRODUCT_SIDE},device>:-fvisibility=hidden>
+)
+
+target_link_libraries(cpu_kernels_context_static PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    -Wl,-z,relro,-z,now
+    -s
+    -ldl
+    -shared
+)
+
+set(INSTALL_LIBRARY_DIR lib)
+
+install(TARGETS cpu_kernels_context OPTIONAL
+    EXPORT cpu_kernels_context-targets
+    LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
+)
+
+install(TARGETS cpu_kernels_context_static OPTIONAL
+    EXPORT cpu_kernels_context_static-targets
+    LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
+)
+
+set(cpu_kernels_context_static ${CMAKE_CURRENT_BINARY_DIR}/libcpu_kernels_context_static.a)
+
+set_target_properties(cpu_kernels_context_static
+    PROPERTIES
+    OUTPUT_NAME cpu_kernels_context
+)
+endif()
diff --git a/cpu_context/common/context.cc b/cpu_context/common/context.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0228aaf921869d7c7346c110bd5c5a1b6d469124
--- /dev/null
+++ b/cpu_context/common/context.cc
@@ -0,0 +1,133 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_context.h"
+#include "aicpu_context.h"
+#include "cpu_node_def.h"
+#include "device.h"
+#include "log.h"
+#include "proto/cpu_attr.pb.h"
+#include "proto/cpu_node_def.pb.h"
+#include "sharder.h"
+#include "status.h"
+
+namespace aicpu {
+CpuKernelContext::CpuKernelContext(DeviceType type) {
+  Device *device = new (std::nothrow) Device(type);
+  if (device != nullptr) {
+    device_.reset(device);
+  }
+}
+
+uint32_t CpuKernelContext::Init(NodeDef *node_def) {
+  KERNEL_CHECK_NULLPTR(node_def, KERNEL_STATUS_PARAM_INVALID,
+                       "Node def is null.")
+  op_ = node_def->GetOpType();
+  KERNEL_LOG_INFO("Construct the ctx of the op[%s] begin.", op_.c_str());
+  for (int32_t i = 0; i < node_def->InputsSize(); i++) {
+    auto input = node_def->MutableInputs(i);
+    KERNEL_CHECK_NULLPTR(input, KERNEL_STATUS_PARAM_INVALID,
+                         "Get input[%d] tensor failed in op[%s].", i, op_.c_str())
+    inputs_.emplace_back(std::move(input));
+  }
+
+  for (int32_t i = 0; i < node_def->OutputsSize(); i++) {
+    auto output = node_def->MutableOutputs(i);
+    KERNEL_CHECK_NULLPTR(output, KERNEL_STATUS_PARAM_INVALID,
+                         "Get output[%d] tensor failed in op[%s].", i,
+                         op_.c_str())
+    outputs_.emplace_back(std::move(output));
+  }
+
+  auto attrMap = node_def->Attrs();
+  for (auto iter = attrMap.begin(); iter != attrMap.end(); ++iter) {
+    auto attr_value_ptr = iter->second;
+    KERNEL_CHECK_NULLPTR(attr_value_ptr, KERNEL_STATUS_PARAM_INVALID,
+                         "Get attr[%s] failed in op[%s].", iter->first.c_str(),
+                         op_.c_str())
+    auto ret =
+        attrs_.insert(std::make_pair(iter->first, std::move(attr_value_ptr)));
+    if (ret.second != true) {
+      KERNEL_LOG_ERROR("Insert attr[%s] failed in op[%s].", iter->first.c_str(),
+                       op_.c_str());
+      return KERNEL_STATUS_INNER_ERROR;
+    }
+  }
+
+  KERNEL_LOG_INFO("Construct the ctx of the op[%s] succcess.", op_.c_str());
+
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * get op type.
+ * @return string: op type
+ */
+std::string CpuKernelContext::GetOpType() const { return op_; }
+
+/*
+ * get input tensor.
+ * @return Tensor *: not null->success, null->failed
+ */
+Tensor *CpuKernelContext::Input(uint32_t index) const {
+  if (index >= inputs_.size()) {
+    KERNEL_LOG_WARN("Index[%u] should be less than input tensors size[%zu].",
+                    index, inputs_.size());
+    return nullptr;
+  }
+
+  return inputs_[index].get();
+}
+
+/*
+ * get output tensor.
+ * @return Tensor *: not null->success, null->failed
+ */
+Tensor *CpuKernelContext::Output(uint32_t index) const {
+  if (index >= outputs_.size()) {
+    KERNEL_LOG_WARN("Index[%u] should be less than output tensors size[%zu].",
+                    index, outputs_.size());
+    return nullptr;
+  }
+
+  return outputs_[index].get();
+}
+
+/*
+ * get attr.
+ * @return AttrValue *: not null->success, null->failed
+ */
+AttrValue *CpuKernelContext::GetAttr(std::string name) const {
+  auto it = attrs_.find(name);
+  if (it == attrs_.end()) {
+    KERNEL_LOG_WARN("Attr[%s] is not exist.", name.c_str());
+    return nullptr;
+  }
+
+  return (it->second).get();
+}
+
+/*
+ * get input size.
+ * @return uint32_t: input size
+ */
+uint32_t CpuKernelContext::GetInputsSize() const { return inputs_.size(); }
+
+/*
+ * get output size.
+ * @return uint32_t: output size
+ */
+uint32_t CpuKernelContext::GetOutputsSize() const { return outputs_.size(); }
+}  // namespace aicpu
diff --git a/cpu_context/common/cpu_kernel_cache.cc b/cpu_context/common/cpu_kernel_cache.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c83dedf8e7fb75d32ff66574078aeec582596597
--- /dev/null
+++ b/cpu_context/common/cpu_kernel_cache.cc
@@ -0,0 +1,466 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_kernel_cache.h"
+
+#include <limits.h>
+
+#include "cce/aicpu_engine_struct.h"
+#include "cpu_kernel.h"
+#include "cpu_kernel_register.h"
+#include "cpu_kernel_utils.h"
+#include "log.h"
+#include "status.h"
+
+using namespace aicpu;
+
+namespace {
+// max io address number limit is 1024
+constexpr uint32_t kMaxIoAddrNumParamLen = 1024;
+// max LRU cache number is 256
+constexpr uint32_t kMaxLRUCacheNum = 256;
+}  // namespace
+
+namespace aicpu {
+/*
+ * Init kernel cache.
+ */
+int32_t CpuKernelCache::InitParameter() {
+  if (!GetSessionFlag()) {
+    SetCapacity(kMaxLRUCacheNum);
+  }
+  return 0;
+}
+
+/*
+ * update framework output tensor shape.
+ */
+uint32_t CpuKernelCache::UpdateFWKOutputShape(
+    bool unknown_shape, const CpuKernelContext &ctx,
+    std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type) {
+  if (unknown_shape) {
+    for (size_t i = 0; i < ctx.GetOutputsSize(); ++i) {
+      Tensor *output = ctx.Output(i);
+      KERNEL_CHECK_NULLPTR(output, KERNEL_STATUS_PARAM_INVALID,
+                           "Get output[%zu] failed.", i)
+      auto shape = output->GetTensorShape();
+      KERNEL_CHECK_NULLPTR(shape, KERNEL_STATUS_PARAM_INVALID,
+                           "Get output[%zu] shape failed.", i)
+
+      for (int32_t index = 0; index < shape->GetDims(); ++index) {
+        output_shape_and_type[i]->dims[index] = shape->GetDimSize(index);
+      }
+    }
+  }
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * get shape information from framework.
+ */
+void CpuKernelCache::GetDimsFromShapeAndType(
+    const FWKAdapter::ShapeAndType *shape_and_type,
+    std::vector<int64_t> &dims) {
+  for (uint32_t index = 0; index < FWKAdapter::kMaxShapeDims; ++index) {
+    // LLONG_MIN for dim end flag
+    if (shape_and_type->dims[index] == LLONG_MIN) {
+      break;
+    }
+    int64_t dim_value = shape_and_type->dims[index];
+    KERNEL_LOG_INFO("Get extend shape[%u] is [%lld]", index, dim_value);
+    dims.emplace_back(dim_value);
+  }
+}
+
+/*
+ * update tensor information.
+ */
+uint32_t CpuKernelCache::UpdateTensor(
+    const std::vector<uint64_t> &io_addrs, bool unknown_shape,
+    const std::vector<FWKAdapter::ShapeAndType *> &input_shape_and_type,
+    const std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type,
+    CpuKernelContext &ctx) {
+  KERNEL_LOG_INFO("Update tensor info begin.");
+  if (io_addrs.size() != ctx.GetInputsSize() + ctx.GetOutputsSize()) {
+    KERNEL_LOG_ERROR(
+        "Addr number[%zu] is not equal to the sum of inputs[%zu] and "
+        "output[%zu].",
+        io_addrs.size(), ctx.GetInputsSize(), ctx.GetOutputsSize());
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  if ((unknown_shape) &&
+      ((input_shape_and_type.size() != ctx.GetInputsSize()) ||
+       (output_shape_and_type.size() != ctx.GetOutputsSize()))) {
+    KERNEL_LOG_ERROR(
+        "Input shape_and_type size error, input size[%zu], input "
+        "shape_and_type "
+        "size[%zu], output size[%zu], output shape_and_type size[%zu].",
+        ctx.GetInputsSize(), input_shape_and_type.size(), ctx.GetOutputsSize(),
+        output_shape_and_type.size());
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  size_t addr_index = 0;
+  for (size_t i = 0; i < ctx.GetInputsSize(); ++i, ++addr_index) {
+    Tensor *input = ctx.Input(i);
+    KERNEL_CHECK_NULLPTR(input, KERNEL_STATUS_PARAM_INVALID,
+                         "Get input[%zu] failed.", i)
+    input->SetData(reinterpret_cast<void *>(
+        static_cast<uintptr_t>(io_addrs[addr_index])));
+
+    if (unknown_shape) {
+      std::vector<int64_t> dims;
+      GetDimsFromShapeAndType(input_shape_and_type[i], dims);
+      auto shape = input->GetTensorShape();
+      KERNEL_CHECK_NULLPTR(shape, KERNEL_STATUS_PARAM_INVALID,
+                           "Get input[%zu] shape failed.", i)
+      shape->SetDimSizes(dims);
+    }
+
+    int64_t calc_data_size = input->CalcDataSizeByShape();
+    uint64_t data_size = calc_data_size < 0 ? 0 : calc_data_size;
+    input->SetDataSize(data_size);
+    KERNEL_LOG_INFO("Set input[%zu] addr[%llu] success.", i,
+                    io_addrs[addr_index]);
+  }
+
+  for (size_t i = 0; i < ctx.GetOutputsSize(); i++, addr_index++) {
+    Tensor *output = ctx.Output(i);
+    KERNEL_CHECK_NULLPTR(output, KERNEL_STATUS_PARAM_INVALID,
+                         "Get output[%zu] failed.", i)
+    output->SetData(reinterpret_cast<void *>(
+        static_cast<uintptr_t>(io_addrs[addr_index])));
+
+    if (unknown_shape) {
+      std::vector<int64_t> dims;
+      GetDimsFromShapeAndType(output_shape_and_type[i], dims);
+      auto shape = output->GetTensorShape();
+      KERNEL_CHECK_NULLPTR(shape, KERNEL_STATUS_PARAM_INVALID,
+                           "Get output[%zu] shape failed.", i)
+      shape->SetDimSizes(dims);
+    }
+
+    int64_t calc_data_size = output->CalcDataSizeByShape();
+    uint64_t data_size = calc_data_size < 0 ? 0 : calc_data_size;
+    output->SetDataSize(data_size);
+    KERNEL_LOG_INFO("Set output[%zu] addr[%llu] success.", i,
+                    io_addrs[addr_index]);
+  }
+  KERNEL_LOG_INFO("Update tensor info success.");
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * parse extend tensor shape types information.
+ */
+uint32_t CpuKernelCache::ParseExtShapeType(const FWKAdapter::ExtInfo *ext_info,
+                                           bool &unknown_shape) {
+  if (ext_info->infoLen != sizeof(int32_t)) {
+    KERNEL_LOG_ERROR(
+        "Parse extend shape type failed, as info length must be [%zu], but got "
+        "[%u].",
+        sizeof(int32_t), ext_info->infoLen);
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  unknown_shape = true;
+  KERNEL_LOG_INFO("Kernel has unknown shape.");
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * parse extend tensor shape and types information.
+ */
+uint32_t CpuKernelCache::ParseExtShapeAndType(
+    bool unknown_shape, FWKAdapter::ExtInfo *ext_info,
+    std::vector<FWKAdapter::ShapeAndType *> &shape_and_type) {
+  if (!unknown_shape) {
+    return KERNEL_STATUS_OK;
+  }
+  uint32_t size = (ext_info->infoLen) / sizeof(FWKAdapter::ShapeAndType);
+  KERNEL_LOG_INFO("Parse extend shape and type, size[%u].", size);
+  uint32_t check = (ext_info->infoLen) % sizeof(FWKAdapter::ShapeAndType);
+  if (check != 0) {
+    KERNEL_LOG_ERROR(
+        "Parse extend info length[%u] failed, must be integer multiple of the "
+        "[%zu].",
+        ext_info->infoLen, sizeof(FWKAdapter::ShapeAndType));
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  auto shapes = reinterpret_cast<FWKAdapter::ShapeAndType *>(ext_info->infoMsg);
+  for (uint32_t index = 0; index < size; ++index) {
+    shape_and_type.emplace_back(&shapes[index]);
+  }
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * parse extend session information.
+ */
+uint32_t CpuKernelCache::ParseExtSessionInfo(FWKAdapter::ExtInfo *ext_info,
+                                             uint64_t &kernel_id) {
+  // no overflow
+  KERNEL_LOG_INFO("Parse extend session info.");
+  auto need_len = sizeof(SessionInfo);
+  if (ext_info->infoLen != need_len) {
+    KERNEL_LOG_ERROR(
+        "Parse extend session info failed, as info length must be "
+        "[%zu], but got [%u].",
+        sizeof(SessionInfo), ext_info->infoLen);
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  auto session = reinterpret_cast<SessionInfo *>(ext_info->infoMsg);
+  kernel_id = session->kernelId;
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * get bit status.
+ */
+bool CpuKernelCache::GetBitStatus(uint64_t num, uint64_t pos) {
+  return ((num & (1 << pos)) != 0);
+}
+
+/*
+ * parse bitmap information.
+ */
+uint32_t CpuKernelCache::ParseExtBitMap(const FWKAdapter::ExtInfo *ext_info,
+                                        bool &unknown_shape) {
+  if (ext_info->infoLen != sizeof(int64_t)) {
+    KERNEL_LOG_ERROR(
+        "Parse extend bitmap failed, as info length must be [%zu], but got "
+        "[%u].",
+        sizeof(int64_t), ext_info->infoLen);
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  uint64_t bit_map = *(reinterpret_cast<const int64_t *>(ext_info->infoMsg));
+  unknown_shape = (!GetBitStatus(bit_map, 0));
+  KERNEL_LOG_INFO("Unknown_shape_ is [%d].", unknown_shape);
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * parse extend information.
+ */
+uint32_t CpuKernelCache::ParseExtMsg(AicpuParamHead *param_head,
+                                     bool &has_session_info,
+                                     uint64_t &kernel_id,
+                                     bool &unknown_shape,
+                                     std::vector<FWKAdapter::ShapeAndType *> &input_shape_and_type,
+                                     std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type) {
+  KERNEL_LOG_INFO("Parse extend info and update shape begin.");
+  uint32_t offset = 0;
+  FWKAdapter::ExtInfo *ext_info = nullptr;
+  char *extInfo_buf =
+      reinterpret_cast<char *>(static_cast<uintptr_t>(param_head->extInfoAddr));
+  while (offset + sizeof(FWKAdapter::ExtInfo) <= param_head->extInfoLength) {
+    ext_info = reinterpret_cast<FWKAdapter::ExtInfo *>(extInfo_buf + offset);
+    if (ext_info == nullptr) {
+      KERNEL_LOG_ERROR(
+          "Extend info is nullptr, extInfo length[%u], extend info addr[%p], "
+          "offset[%u].",
+          param_head->extInfoLength, param_head->extInfoAddr, offset);
+      return KERNEL_STATUS_PARAM_INVALID;
+    }
+
+    uint32_t ret = KERNEL_STATUS_OK;
+    switch (ext_info->infoType) {
+      case FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE:
+        ret = ParseExtShapeType(ext_info, unknown_shape);
+        break;
+      case FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE:
+        ret = ParseExtShapeAndType(unknown_shape, ext_info, input_shape_and_type);
+        break;
+      case FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE:
+        ret = ParseExtShapeAndType(unknown_shape, ext_info, output_shape_and_type);
+        break;
+      case FWKAdapter::FWK_ADPT_EXT_SESSION_INFO:
+        has_session_info = true;
+        ret = ParseExtSessionInfo(ext_info, kernel_id);
+        break;
+      case FWKAdapter::FWK_ADPT_EXT_BITMAP:
+        ret = ParseExtBitMap(ext_info, unknown_shape);
+        break;
+      default:
+        KERNEL_LOG_INFO("Ignore infoType[%d], infoLen[%u].", ext_info->infoType,
+                        ext_info->infoLen);
+        break;
+    }
+
+    if (ret != KERNEL_STATUS_OK) {
+      return ret;
+    }
+
+    // not overflow
+    offset += FWKAdapter::kExtInfoHeadSize;
+    offset += ext_info->infoLen;
+  }
+
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * parse io address.
+ */
+uint32_t CpuKernelCache::ParseIoAddr(AicpuParamHead *param_head,
+                                     std::vector<uint64_t> &io_addrs,
+                                     char *&nodedef, uint32_t &nodedef_len) {
+  auto param_base = reinterpret_cast<char *>(param_head);
+  char *extend_param_base = param_base + sizeof(AicpuParamHead);
+  uint32_t extend_param_len = param_head->length - sizeof(AicpuParamHead);
+
+  if (param_head->ioAddrNum > 0) {
+    if (param_head->ioAddrNum > kMaxIoAddrNumParamLen) {
+      KERNEL_LOG_ERROR("Param ioAddrNum[%u] is over %u.", param_head->ioAddrNum,
+                       kMaxIoAddrNumParamLen);
+      return KERNEL_STATUS_PARAM_INVALID;
+    }
+
+    uint32_t addr_len = param_head->ioAddrNum * sizeof(uint64_t);
+    if (extend_param_len < addr_len) {
+      KERNEL_LOG_ERROR(
+          "Extend param is not enough for io addr, ioAddrNum[%u], "
+          "extend_param_len[%u].",
+          param_head->ioAddrNum, extend_param_len);
+      return KERNEL_STATUS_PARAM_INVALID;
+    }
+
+    auto io_addr_base = reinterpret_cast<uint64_t *>(extend_param_base);
+    for (uint32_t i = 0; i < param_head->ioAddrNum; ++i) {
+      io_addrs.push_back(io_addr_base[i]);
+    }
+    extend_param_base = extend_param_base + addr_len;
+    extend_param_len -= addr_len;
+  }
+
+  if (extend_param_len < sizeof(uint32_t)) {
+    KERNEL_LOG_ERROR(
+        "Extend param is not enough for addr, needLen[%zu], "
+        "extend_param_len[%u].",
+        sizeof(uint32_t), extend_param_len);
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  nodedef_len = *reinterpret_cast<uint32_t *>(extend_param_base);
+  extend_param_base += sizeof(uint32_t);
+  nodedef = extend_param_base;
+  KERNEL_LOG_INFO("Parse io addr success, io number[%zu], nodedef length[%u].",
+                  io_addrs.size(), nodedef_len);
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * get cpu kernel context from cache
+ */
+std::shared_ptr<CpuKernelContext> CpuKernelCache::GetCpuKernelContext(
+    bool has_sess_info, uint64_t kernel_id, const char *nodedef,
+    uint32_t nodedef_len, std::shared_ptr<NodeDef> &nodedef_proto) {
+  std::shared_ptr<CpuKernelContext> ctx = nullptr;
+  KERNEL_LOG_INFO("Get cpu kernel context begin, kernel id[%llu].", kernel_id);
+  if (has_sess_info) {
+    CpuCacheData *cache = GetCache(kernel_id);
+    if (cache != nullptr) {
+      KERNEL_LOG_INFO("Get kernel from cache success.");
+      return cache->context;
+    }
+  }
+
+  std::string str_data(nodedef, nodedef_len);
+  nodedef_proto = CpuKernelUtils::CreateNodeDef();
+  KERNEL_CHECK_NULLPTR(nodedef_proto  ,
+                       std::shared_ptr<CpuKernelContext>(nullptr),
+                       "Create node def failed.")
+  if (!nodedef_proto->ParseFromString(str_data)) {
+    return std::shared_ptr<CpuKernelContext>(nullptr);
+  }
+
+  CpuKernelContext *tmp = new (std::nothrow) CpuKernelContext(DEVICE);
+  KERNEL_CHECK_NULLPTR(tmp, std::shared_ptr<CpuKernelContext>(nullptr),
+                       "Create context failed.")
+  ctx = std::shared_ptr<CpuKernelContext>(tmp);
+  uint32_t ret = ctx->Init(nodedef_proto.get());
+  if (ret != KERNEL_STATUS_OK) {
+    return std::shared_ptr<CpuKernelContext>(nullptr);
+  }
+
+  if (has_sess_info) {
+    CpuCacheData *cache_ptr =
+        new (std::nothrow) CpuCacheData(nodedef_proto, ctx);
+    KERNEL_CHECK_NULLPTR(cache_ptr, std::shared_ptr<CpuKernelContext>(nullptr),
+                         "Create cpu cache data failed.")
+    std::shared_ptr<CpuCacheData> cache_shared =
+        std::shared_ptr<CpuCacheData>(cache_ptr);
+    SetCache(kernel_id, cache_shared);
+    KERNEL_LOG_INFO("Cache cpu kernel data success, kernel id[%llu].",
+                    kernel_id);
+  }
+  KERNEL_LOG_INFO("Get cpu kernel context success, kernel id[%llu].",
+                  kernel_id);
+  return ctx;
+}
+
+/*
+ * run kernel.
+ */
+int32_t CpuKernelCache::RunKernel(void *param) {
+  AicpuParamHead *param_head = static_cast<AicpuParamHead *>(param);
+  std::vector<uint64_t> io_addrs;
+  char *nodedef = nullptr;
+  uint32_t nodedef_len = 0;
+  uint32_t ret = ParseIoAddr(param_head, io_addrs, nodedef, nodedef_len);
+  if (ret != KERNEL_STATUS_OK) {
+    return -1;
+  }
+
+  bool has_sess_info = false;
+  uint64_t kernel_id = 0;
+  bool unknown_shape = false;
+  std::vector<FWKAdapter::ShapeAndType *> input_shape_and_type;
+  std::vector<FWKAdapter::ShapeAndType *> output_shape_and_type;
+  ret = ParseExtMsg(param_head, has_sess_info, kernel_id, unknown_shape,
+                    input_shape_and_type, output_shape_and_type);
+  if (ret != KERNEL_STATUS_OK) {
+    return -1;
+  }
+
+  std::shared_ptr<NodeDef> nodedef_proto = nullptr;
+  auto ctx = GetCpuKernelContext(has_sess_info, kernel_id, nodedef, nodedef_len,
+                                 nodedef_proto);
+  KERNEL_CHECK_NULLPTR(ctx, KERNEL_STATUS_INNER_ERROR,
+                       "Get cpu kernel context from buff failed.")
+
+  ret = UpdateTensor(io_addrs, unknown_shape, input_shape_and_type,
+                     output_shape_and_type, *ctx);
+  if (ret != KERNEL_STATUS_OK) {
+    return -1;
+  }
+
+  ret = CpuKernelRegister::Instance().RunCpuKernel(*ctx);
+  if (ret != KERNEL_STATUS_OK) {
+    return -1;
+  }
+
+  ret = UpdateFWKOutputShape(unknown_shape, *ctx, output_shape_and_type);
+  if (ret != KERNEL_STATUS_OK) {
+    return -1;
+  }
+  return 0;
+}
+
+}  // namespace aicpu
diff --git a/cpu_context/common/cpu_kernel_cache.h b/cpu_context/common/cpu_kernel_cache.h
new file mode 100644
index 0000000000000000000000000000000000000000..85c6cc27e42de56103592d072e646ebe58c14c95
--- /dev/null
+++ b/cpu_context/common/cpu_kernel_cache.h
@@ -0,0 +1,166 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CPU_KERNEL_CACHE_H_
+#define AICPU_CPU_KERNEL_CACHE_H_
+
+#include <map>
+#include <memory>
+
+#include "aicpu_task_struct.h"
+#include "cce/fwk_adpt_struct.h"
+#include "cpu_context.h"
+#include "cpu_node_def.h"
+#include "kernel_cache.h"
+
+namespace aicpu {
+struct CpuCacheData {
+  std::shared_ptr<NodeDef> proto = nullptr;
+  std::shared_ptr<CpuKernelContext> context = nullptr;
+  CpuCacheData(std::shared_ptr<NodeDef> proto,
+               std::shared_ptr<CpuKernelContext> context)
+      : proto(proto), context(context) {}
+};
+
+class CpuKernelCache : public KernelCache<CpuCacheData> {
+ public:
+  CpuKernelCache() = default;
+  ~CpuKernelCache() = default;
+
+  /*
+   * Init kernel cache.
+   * @return int32_t: 0 indicates success, while the others fail
+   */
+  int32_t InitParameter() override;
+
+  /*
+   * run kernel.
+   * @param param: kernel context
+   * @return int32_t: 0 indicates success, whilWe the others fail
+   */
+  int32_t RunKernel(void *param) override;
+
+ private:
+  CpuKernelCache(const CpuKernelCache &) = delete;
+  CpuKernelCache(CpuKernelCache &&) = delete;
+  CpuKernelCache &operator=(const CpuKernelCache &) = delete;
+  CpuKernelCache &operator=(CpuKernelCache &&) = delete;
+
+  /*
+   * update framework output tensor shape.
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t UpdateFWKOutputShape(
+      bool unknown_shape, const CpuKernelContext &ctx,
+      std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type);
+
+  /*
+   * get shape information from framework.
+   * @param dims: shape information
+   */
+  void GetDimsFromShapeAndType(const FWKAdapter::ShapeAndType *shape_and_type,
+                               std::vector<int64_t> &dims);
+
+  /*
+   * update tensor information.
+   * @param ctx: kernel context
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t UpdateTensor(
+      const std::vector<uint64_t> &io_addrs, bool unknown_shape,
+      const std::vector<FWKAdapter::ShapeAndType *> &input_shape_and_type,
+      const std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type,
+      CpuKernelContext &ctx);
+
+  /*
+   * parse extend tensor shape types information.
+   * @param ext_info: extend information
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseExtShapeType(const FWKAdapter::ExtInfo *ext_info,
+                             bool &unknown_shape);
+
+  /*
+   * parse extend tensor bitmap information.
+   * @param ext_info: extend information
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseExtBitMap(const FWKAdapter::ExtInfo *ext_info,
+                          bool &unknown_shape);
+
+  /*
+   * parse extend tensor shape and types information.
+   * @param ext_info: extend information
+   * @param shape_and_type: shape and types from extend information
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseExtShapeAndType(
+      bool unknown_shape, FWKAdapter::ExtInfo *ext_info,
+      std::vector<FWKAdapter::ShapeAndType *> &shape_and_type);
+
+  /*
+   * parse extend session information.
+   * @param ext_info: extend information
+   * @param kernel_id: kernel id from extend information
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseExtSessionInfo(FWKAdapter::ExtInfo *ext_info,
+                               uint64_t &kernel_id);
+
+  /*
+   * parse extend information.
+   * @param param_head: kernel context
+   * @param has_session_info: whether has session info in extend info
+   * @param kernel_id: kernel id
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseExtMsg(
+      AicpuParamHead *param_head, bool &has_session_info, uint64_t &kernel_id,
+      bool &unknown_shape,
+      std::vector<FWKAdapter::ShapeAndType *> &input_shape_and_type,
+      std::vector<FWKAdapter::ShapeAndType *> &output_shape_and_type);
+
+  /*
+   * parse io address.
+   * @param param_head: kernel context
+   * @param io_addrs: kernel inputs and outputs adress
+   * @param nodedef: kernel node def
+   * @param nodedef_len: kernel node def length
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  uint32_t ParseIoAddr(AicpuParamHead *param_head,
+                       std::vector<uint64_t> &io_addrs, char *&nodedef,
+                       uint32_t &nodedef_len);
+
+  /*
+   * get cpu kernel context from cache
+   * @param has_sess_info: whether has session info
+   * @param kernel_id: kernel id, the key of cache
+   * @return uint32_t: 0 indicates success, while the others fail
+   */
+  std::shared_ptr<CpuKernelContext> GetCpuKernelContext(
+      bool has_sess_info, uint64_t kernel_id, const char *nodedef,
+      uint32_t nodedef_len, std::shared_ptr<NodeDef> &nodedef_proto);
+
+  /*
+   * get bit status on pos
+   * @param num: input number
+   * @param pos: bit pos
+   * @return bool: bit is 1 or 0
+   */
+  bool GetBitStatus(uint64_t num, uint64_t pos);
+};
+}  // namespace aicpu
+#endif  // AICPU_CPU_KERNEL_CACHE_H_
diff --git a/cpu_context/common/cpu_kernel_register.cc b/cpu_context/common/cpu_kernel_register.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9ef0a6b1a1aad387bcc4e73151b2b8eb5070b54b
--- /dev/null
+++ b/cpu_context/common/cpu_kernel_register.cc
@@ -0,0 +1,134 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_kernel_register.h"
+
+#include <mutex>
+
+#include "aicpu_context.h"
+#include "cpu_kernel.h"
+#include "log.h"
+#include "status.h"
+
+namespace {
+#define TYPE_REGISTAR(type, fun) type##Registerar(type, fun)
+// protect creatorMap_
+std::mutex g_mutex;
+}  // namespace
+
+namespace aicpu {
+/*
+ * regist kernel.
+ */
+bool RegistCpuKernel(const std::string &type, const KERNEL_CREATOR_FUN &fun) {
+  CpuKernelRegister::Registerar TYPE_REGISTAR(type, fun);
+  return true;
+}
+
+/*
+ * get instance.
+ * @return CpuKernelRegister &: CpuKernelRegister instance
+ */
+CpuKernelRegister &CpuKernelRegister::Instance() {
+  static CpuKernelRegister instance;
+  return instance;
+}
+
+/*
+ * get cpu kernel.
+ * param opType: the op type of kernel
+ * @return shared_ptr<CpuKernel>: cpu kernel ptr
+ */
+std::shared_ptr<CpuKernel> CpuKernelRegister::GetCpuKernel(
+    const std::string &opType) {
+  std::unique_lock<std::mutex> lock(g_mutex);
+  auto iter = creatorMap_.find(opType);
+  if (iter != creatorMap_.end()) {
+    return iter->second();
+  }
+  KERNEL_LOG_WARN("The kernel[%s] is not registered.", opType.c_str());
+  return std::shared_ptr<CpuKernel>(nullptr);
+}
+
+/*
+ * get all cpu kernel registered op types.
+ * @return std::vector<string>: all cpu kernel registered op type
+ */
+std::vector<std::string> CpuKernelRegister::GetAllRegisteredOpTypes() const {
+  std::vector<std::string> ret;
+  std::unique_lock<std::mutex> lock(g_mutex);;
+  for (auto iter = creatorMap_.begin(); iter != creatorMap_.end(); ++iter) {
+    ret.push_back(iter->first);
+  }
+
+  return ret;
+}
+
+/*
+ * run cpu kernel.
+ * param ctx: context of kernel
+ * @return uint32_t: 0->success other->failed
+ */
+uint32_t CpuKernelRegister::RunCpuKernel(CpuKernelContext &ctx) {
+  std::string type = ctx.GetOpType();
+  KERNEL_LOG_INFO("RunCpuKernel[%s] begin.", type.c_str());
+  auto kernel = GetCpuKernel(type);
+  if (kernel == nullptr) {
+    return KERNEL_STATUS_INNER_ERROR;
+  }
+  if (aicpu::SetThreadLocalCtx != nullptr) {
+    if (aicpu::SetThreadLocalCtx(aicpu::CONTEXT_KEY_OP_NAME, type) !=
+        aicpu::AICPU_ERROR_NONE) {
+      KERNEL_LOG_ERROR("Set kernel name[%s] to context failed.", type.c_str());
+      return KERNEL_STATUS_INNER_ERROR;
+    }
+  }
+  if (aicpu::SetOpname != nullptr) {
+    (void)aicpu::SetOpname(type);
+  }
+
+  auto start = std::chrono::steady_clock::now();
+  uint32_t ret = kernel->Compute(ctx);
+  auto end = std::chrono::steady_clock::now();
+  double dr_us=std::chrono::duration<double,std::micro>(end-start).count();
+  KERNEL_LOG_EVENT("RunCpuKernel[%s], run time is [%lf] us.", type.c_str(), dr_us);
+  if (ret != KERNEL_STATUS_OK) {
+    return ret;
+  }
+  KERNEL_LOG_INFO("RunCpuKernel[%s] success.", type.c_str());
+  return KERNEL_STATUS_OK;
+}
+
+CpuKernelRegister::Registerar::Registerar(const std::string &type,
+                                          const KERNEL_CREATOR_FUN &fun) {
+  CpuKernelRegister::Instance().Register(type, fun);
+}
+
+// register creator, this function will call in the constructor
+void CpuKernelRegister::Register(const std::string &type,
+                                 const KERNEL_CREATOR_FUN &fun) {
+  std::unique_lock<std::mutex> lock(g_mutex);
+  std::map<std::string, KERNEL_CREATOR_FUN>::iterator iter =
+      creatorMap_.find(type);
+  if (iter != creatorMap_.end()) {
+    KERNEL_LOG_WARN("Register[%s] creator already exist",
+                    type.c_str());
+    return;
+  }
+
+  creatorMap_[type] = fun;
+  KERNEL_LOG_DEBUG("Kernel[%s] register successfully", type.c_str());
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/cpu_kernel_utils.cc b/cpu_context/common/cpu_kernel_utils.cc
new file mode 100644
index 0000000000000000000000000000000000000000..780068331e0d786eda74689ee6005f2a7501369f
--- /dev/null
+++ b/cpu_context/common/cpu_kernel_utils.cc
@@ -0,0 +1,231 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_kernel_utils.h"
+
+#include "attr_value_impl.h"
+#include "device.h"
+#include "log.h"
+#include "node_def_impl.h"
+#include "sharder.h"
+#include "status.h"
+#include "tensor_impl.h"
+#include "tensor_shape_impl.h"
+
+namespace aicpu {
+/*
+ * construct Tensor for memory self-management.
+ */
+std::shared_ptr<Tensor> CpuKernelUtils::CreateTensor() {
+  auto proto_ptr = new (std::nothrow) aicpuops::Tensor();
+  KERNEL_CHECK_NULLPTR(proto_ptr, std::shared_ptr<Tensor>(nullptr),
+                       "New Tensor proto failed.")
+
+  auto wrapper_ptr = new (std::nothrow)
+      TensorImpl(proto_ptr, [](aicpuops::Tensor *p) { delete p; });
+  if (wrapper_ptr == nullptr) {
+    KERNEL_LOG_ERROR("New TensorProto failed");
+    delete proto_ptr;
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto class_ptr = new (std::nothrow) Tensor(wrapper_ptr);
+  if (class_ptr == nullptr) {
+    KERNEL_LOG_ERROR("New Tensor failed");
+    delete wrapper_ptr;
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  return std::shared_ptr<Tensor>(class_ptr);
+}
+
+std::shared_ptr<Tensor> CpuKernelUtils::CreateTensor(TensorImpl *tensor) {
+  KERNEL_CHECK_NULLPTR(tensor, std::shared_ptr<Tensor>(nullptr),
+                       "Tensor is null.")
+  auto class_ptr = new (std::nothrow) Tensor(tensor);
+  KERNEL_CHECK_NULLPTR(class_ptr, std::shared_ptr<Tensor>(nullptr),
+                       "New Tensor failed.")
+  return std::shared_ptr<Tensor>(class_ptr);
+}
+
+/*
+ * get tensor impl.
+ */
+std::shared_ptr<TensorImpl> CpuKernelUtils::GetImpl(const Tensor *tensor) {
+  return tensor->impl_;
+}
+
+/*
+ * get tensor name.
+ */
+std::string CpuKernelUtils::GetTensorName(const Tensor *tensor) {
+  auto impl = GetImpl(tensor);
+  KERNEL_CHECK_NULLPTR(impl, std::string(), "Get Tensor impl failed.")
+  return impl->GetName();
+}
+
+/*
+ * set tensor name.
+ */
+void CpuKernelUtils::SetTensorName(const std::string &name,
+                                   std::shared_ptr<Tensor> &tensor) {
+  KERNEL_LOG_INFO("Set tensor name[%s]", name.c_str());
+  auto impl = GetImpl(tensor.get());
+  KERNEL_CHECK_NULLPTR_VOID(impl, "Get Tensor impl failed.")
+  impl->SetName(name);
+}
+
+std::shared_ptr<TensorShape> CpuKernelUtils::CreateTensorShape() {
+  auto proto_ptr = new (std::nothrow) aicpuops::TensorShape();
+  KERNEL_CHECK_NULLPTR(proto_ptr, std::shared_ptr<TensorShape>(nullptr),
+                       "New TensorShape proto failed.")
+
+  auto wrapper_ptr = new (std::nothrow)
+      TensorShapeImpl(proto_ptr, [](aicpuops::TensorShape *p) { delete p; });
+  if (wrapper_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new TensorShapeImpl failed");
+    delete proto_ptr;
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  auto class_ptr = new (std::nothrow) TensorShape(wrapper_ptr);
+  if (class_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new TensorShape failed");
+    delete wrapper_ptr;
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  return std::shared_ptr<TensorShape>(class_ptr);
+}
+
+std::shared_ptr<TensorShape> CpuKernelUtils::CreateTensorShape(
+    TensorShapeImpl *tensor_shape) {
+  KERNEL_CHECK_NULLPTR(tensor_shape, std::shared_ptr<TensorShape>(nullptr),
+                       "Tensor shape proto is null.")
+  auto class_ptr = new (std::nothrow) TensorShape(tensor_shape);
+  KERNEL_CHECK_NULLPTR(class_ptr, std::shared_ptr<TensorShape>(nullptr),
+                       "New TensorShape failed.")
+  return std::shared_ptr<TensorShape>(class_ptr);
+}
+
+/*
+ * get tensor shape impl.
+ */
+std::shared_ptr<TensorShapeImpl> CpuKernelUtils::GetImpl(
+    const TensorShape *tensor_shape) {
+  return tensor_shape->impl_;
+}
+
+/*
+ * construct AttrValue for memory self-management.
+ */
+std::shared_ptr<AttrValue> CpuKernelUtils::CreateAttrValue() {
+  auto proto_ptr = new (std::nothrow) aicpuops::AttrValue();
+  KERNEL_CHECK_NULLPTR(proto_ptr, std::shared_ptr<AttrValue>(nullptr),
+                       "New AttrValue proto failed.")
+
+  auto wrapper_ptr = new (std::nothrow)
+      AttrValueImpl(proto_ptr, [](aicpuops::AttrValue *p) { delete p; });
+  if (wrapper_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new AttrValueImpl failed");
+    delete proto_ptr;
+    return std::shared_ptr<AttrValue>(nullptr);
+  }
+
+  auto class_ptr = new (std::nothrow) AttrValue(wrapper_ptr);
+  if (class_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new AttrValue failed");
+    delete wrapper_ptr;
+    return std::shared_ptr<AttrValue>(nullptr);
+  }
+
+  return std::shared_ptr<AttrValue>(class_ptr);
+}
+
+std::shared_ptr<AttrValue> CpuKernelUtils::CreateAttrValue(
+    AttrValueImpl *impl) {
+  KERNEL_CHECK_NULLPTR(impl, std::shared_ptr<AttrValue>(nullptr),
+                       "Impl is null.")
+  auto class_ptr = new (std::nothrow) AttrValue(impl);
+  KERNEL_CHECK_NULLPTR(class_ptr, std::shared_ptr<AttrValue>(nullptr),
+                       "New AttrValue failed.")
+  return std::shared_ptr<AttrValue>(class_ptr);
+}
+
+/*
+ * get attr value impl.
+ */
+std::shared_ptr<AttrValueImpl> CpuKernelUtils::GetImpl(
+    const AttrValue *attr_value) {
+  return attr_value->impl_;
+}
+
+/*
+ * construct NodeDef for memory self-management.
+ */
+std::shared_ptr<NodeDef> CpuKernelUtils::CreateNodeDef() {
+  auto proto_ptr = new (std::nothrow) aicpuops::NodeDef();
+  KERNEL_CHECK_NULLPTR(proto_ptr, std::shared_ptr<NodeDef>(nullptr),
+                       "New NodeDef proto failed.")
+
+  auto wrapper_ptr = new (std::nothrow)
+      NodeDefImpl(proto_ptr, [](aicpuops::NodeDef *p) { delete p; });
+  if (wrapper_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new NodeDefImpl failed");
+    delete proto_ptr;
+    return std::shared_ptr<NodeDef>(nullptr);
+  }
+
+  auto class_ptr = new (std::nothrow) NodeDef(wrapper_ptr);
+  if (class_ptr == nullptr) {
+    KERNEL_LOG_ERROR("new NodeDef failed");
+    delete wrapper_ptr;
+    return std::shared_ptr<NodeDef>(nullptr);
+  }
+
+  return std::shared_ptr<NodeDef>(class_ptr);
+}
+
+/*
+ * ParallelFor shards the "total" units of work.
+ * @return uint32_t: 0->sucess other->failed
+ */
+uint32_t CpuKernelUtils::ParallelFor(
+    const CpuKernelContext &ctx, int64_t total, int64_t perUnitSize,
+    const std::function<void(int64_t, int64_t)> &work) {
+  KERNEL_CHECK_NULLPTR(ctx.device_, KERNEL_STATUS_INNER_ERROR,
+                       "Device is null.")
+
+  const Sharder *sharder = ctx.device_->GetSharder();
+  KERNEL_CHECK_NULLPTR(sharder, KERNEL_STATUS_INNER_ERROR,
+                       "Get sharder is null.")
+
+  sharder->ParallelFor(total, perUnitSize, work);
+  return KERNEL_STATUS_OK;
+}
+
+/*
+ * Get CPU number
+ * @return CPU number
+ */
+uint32_t CpuKernelUtils::GetCPUNum(const CpuKernelContext &ctx) {
+  KERNEL_CHECK_NULLPTR(ctx.device_, 0, "Device is null.")
+
+  const Sharder *sharder = ctx.device_->GetSharder();
+  KERNEL_CHECK_NULLPTR(sharder, 0, "Get sharder is null.")
+
+  return sharder->GetCPUNum();
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/device.cc b/cpu_context/common/device.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ca53ec03f139efe2dafa74faaf2780628216ba48
--- /dev/null
+++ b/cpu_context/common/device.cc
@@ -0,0 +1,64 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "device.h"
+
+#include <new>
+
+#include "device_sharder.h"
+#include "host_sharder.h"
+
+namespace aicpu {
+Device::Device(DeviceType device) {
+  device_ = device;
+  sharder_ = InitSharder(device);
+}
+
+Device::~Device() {
+  if (sharder_ != nullptr) {
+    delete sharder_;
+  }
+}
+
+/*
+ * get device type.
+ * @return DeviceType: HOST/DEVICE
+ */
+DeviceType Device::GetDeviceType() const { return device_; }
+
+/*
+ * get sharder.
+ * @return Sharder *: host or device sharder
+ */
+const Sharder *Device::GetSharder() const {
+  if (sharder_ != nullptr) {
+    return sharder_;
+  }
+  return nullptr;
+}
+
+/*
+ * init sharder.
+ * param device: type of device
+ * @return Sharder *: not null->success, null->success
+ */
+Sharder *Device::InitSharder(DeviceType device_) {
+  if (device_ == DEVICE) {
+    return new (std::nothrow) DeviceSharder(device_);
+  } else {
+    return new (std::nothrow) HostSharder(device_);
+  }
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/device.h b/cpu_context/common/device.h
new file mode 100644
index 0000000000000000000000000000000000000000..f8cbc93ef72c71bf0c23c83609142a4ef5e8e4ee
--- /dev/null
+++ b/cpu_context/common/device.h
@@ -0,0 +1,58 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_DEVICE_H_
+#define AICPU_CONTEXT_COMMON_DEVICE_H_
+
+#include "sharder.h"
+
+namespace aicpu {
+class Device {
+ public:
+  explicit Device(DeviceType device);
+
+  ~Device();
+
+  /*
+   * get device type.
+   * @return DeviceType: HOST/DEVICE
+   */
+  DeviceType GetDeviceType() const;
+
+  /*
+   * get sharder.
+   * @return Sharder *: host or device sharder
+   */
+  const Sharder *GetSharder() const;
+
+ private:
+  Device(const Device &) = delete;
+  Device(Device &&) = delete;
+  Device &operator=(const Device &) = delete;
+  Device &operator=(Device &&) = delete;
+
+  /*
+   * init sharder.
+   * param device: type of device
+   * @return Sharder *: not null->success, null->success
+   */
+  Sharder *InitSharder(DeviceType device);
+
+ private:
+  DeviceType device_;  // type of device
+  Sharder *sharder_;
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_DEVICE_H_
diff --git a/cpu_context/common/device_cpu_kernel.cc b/cpu_context/common/device_cpu_kernel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8aac163a7616aa01524feee24292c9a23663a492
--- /dev/null
+++ b/cpu_context/common/device_cpu_kernel.cc
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "device_cpu_kernel.h"
+
+#include "aicpu_context.h"
+#include "cce/aicpu_engine_struct.h"
+#include "cce/fwk_adpt_struct.h"
+#include "cpu_kernel_cache.h"
+#include "log.h"
+#include "session_cache.h"
+#include "status.h"
+
+using namespace aicpu;
+namespace {
+// max param len limit 10k.
+constexpr uint32_t kMaxParamLen = 10240;
+// max extend info len limit 20k.
+constexpr uint32_t kMaxExtendLen = 20480;
+const std::string kContextKeyStreamId = "streamId";
+
+uint32_t ParseExtSessionInfo(AicpuParamHead *param_head,
+                             SessionInfo *&session) {
+  KERNEL_LOG_INFO("Parse extend session info begin.");
+  uint32_t offset = 0;
+  FWKAdapter::ExtInfo *ext_info = nullptr;
+  char *ext_info_buf =
+      reinterpret_cast<char *>(static_cast<uintptr_t>(param_head->extInfoAddr));
+  while (offset + sizeof(FWKAdapter::ExtInfo) <= param_head->extInfoLength) {
+    ext_info = reinterpret_cast<FWKAdapter::ExtInfo *>(ext_info_buf + offset);
+    if (ext_info == nullptr) {
+      KERNEL_LOG_ERROR(
+          "Extend info is nullptr, extend info length[%u], extend info "
+          "offset[%u].",
+          param_head->extInfoLength, offset);
+      return KERNEL_STATUS_PARAM_INVALID;
+    }
+
+    if (ext_info->infoType == FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) {
+      auto need_len = sizeof(SessionInfo);
+      if (ext_info->infoLen != need_len) {
+        KERNEL_LOG_ERROR(
+            "Parse extend session info failed, as info length must be "
+            "[%zu], but %u.",
+            sizeof(SessionInfo), ext_info->infoLen);
+        return KERNEL_STATUS_PARAM_INVALID;
+      }
+
+      session = reinterpret_cast<SessionInfo *>(ext_info->infoMsg);
+      KERNEL_LOG_INFO("Parse extend session info success.");
+    }
+
+    // not overflow
+    offset += FWKAdapter::kExtInfoHeadSize;
+    offset += ext_info->infoLen;
+  }
+
+  KERNEL_LOG_INFO("Parse extend session info end.");
+  return KERNEL_STATUS_OK;
+}
+}  // namespace
+
+extern "C" {
+__attribute__((visibility("default"))) uint32_t RunCpuKernel(void *param) {
+  KERNEL_LOG_INFO("RunCpuKernel C begin");
+  if (param == nullptr) {
+    KERNEL_LOG_ERROR("Param is null.");
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  // parse param_len
+  AicpuParamHead *param_head = static_cast<AicpuParamHead *>(param);
+  if ((param_head->length < sizeof(AicpuParamHead)) ||
+      (param_head->length > kMaxParamLen) ||
+      (param_head->extInfoLength > kMaxExtendLen)) {
+    KERNEL_LOG_ERROR(
+        "Param length[%u] not in [%zu, %u] or extend info length[%u] is "
+        "greater "
+        "than the limit[%u].",
+        param_head->length, sizeof(AicpuParamHead), kMaxParamLen,
+        param_head->extInfoLength, kMaxExtendLen);
+    return KERNEL_STATUS_PARAM_INVALID;
+  }
+
+  SessionInfo *session = nullptr;
+  uint32_t ret = ParseExtSessionInfo(param_head, session);
+  if (ret != KERNEL_STATUS_OK) {
+    return ret;
+  }
+
+  if (session == nullptr) {
+    KERNEL_LOG_INFO("RunCpuKernel directly.");
+    CpuKernelCache cache;
+    cache.Init(false);
+    return cache.RunKernel(param);
+  }
+
+  std::string stream_id_value;
+  auto status = GetThreadLocalCtx(kContextKeyStreamId, stream_id_value);
+  if (status != AICPU_ERROR_NONE) {
+    KERNEL_LOG_ERROR("GetThreadLocalCtx failed, ret[%d].", status);
+    return KERNEL_STATUS_INNER_ERROR;
+  }
+
+  uint64_t stream_id = atoi(stream_id_value.c_str());
+  KERNEL_LOG_INFO(
+      "RunCpuKernel from cache, stream id[%llu], session id[%llu], session "
+      "flag[%d].",
+      stream_id, session->sessionId, session->sessFlag);
+  return SessionCache<CpuCacheData>::Instance().RunKernel<CpuKernelCache>(
+      param, session->sessionId, stream_id, session->sessFlag);
+}
+}
diff --git a/cpu_context/common/device_cpu_kernel.h b/cpu_context/common/device_cpu_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0bb5e4e5eeb837d760404a37c81109dfedb463a
--- /dev/null
+++ b/cpu_context/common/device_cpu_kernel.h
@@ -0,0 +1,22 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_DEVICE_CPU_KERNEL_H_
+#define AICPU_CONTEXT_COMMON_DEVICE_CPU_KERNEL_H_
+#include <cstdint>
+extern "C" {
+uint32_t RunCpuKernel(void *param);
+}
+#endif // AICPU_CONTEXT_COMMON_DEVICE_CPU_KERNEL_H_
\ No newline at end of file
diff --git a/cpu_context/common/device_sharder.cc b/cpu_context/common/device_sharder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2e5512aa827a7bbb38b85381e11f2a4068e011b9
--- /dev/null
+++ b/cpu_context/common/device_sharder.cc
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "device_sharder.h"
+
+#include <dlfcn.h>
+#include "log.h"
+
+namespace {
+const char *kSharderPath = "/usr/lib64/libaicpu_sharder.so";
+const char *kParallelForFunc = "ParallelFor";
+const char *kGetCPUNumFunc = "GetCPUNum";
+}  // namespace
+
+namespace aicpu {
+DeviceSharder::DeviceSharder(DeviceType device) : Sharder(device) {
+  sharder_ = dlopen(kSharderPath, RTLD_LAZY | RTLD_GLOBAL);
+  if (sharder_ == nullptr) {
+    KERNEL_LOG_WARN("Device sharder dlopen so[%s] failed, error[%s]",
+                    kSharderPath, dlerror());
+    parallel_for_ = nullptr;
+    get_cpu_num_ = nullptr;
+  } else {
+    parallel_for_ =
+        reinterpret_cast<ParallelForFunc>(dlsym(sharder_, kParallelForFunc));
+    if (parallel_for_ == nullptr) {
+      KERNEL_LOG_WARN("Get function[%s] address failed, error[%s]",
+                      kParallelForFunc, dlerror());
+    }
+
+    get_cpu_num_ =
+        reinterpret_cast<GetCPUNumFunc>(dlsym(sharder_, kGetCPUNumFunc));
+    if (get_cpu_num_ == nullptr) {
+      KERNEL_LOG_WARN("Get function[%s] address failed, error[%s]",
+                      kGetCPUNumFunc, dlerror());
+    }
+    KERNEL_LOG_INFO("Device sharder dlopen so[%s] success", kSharderPath);
+  }
+}
+
+DeviceSharder::~DeviceSharder() {
+    if (sharder_ != nullptr) {
+        (void) dlclose(sharder_);
+    }
+}
+
+/*
+ * ParallelFor shards the "total" units of work.
+ */
+void DeviceSharder::ParallelFor(
+    int64_t total, int64_t perUnitSize,
+    const std::function<void(int64_t, int64_t)> &work) const {
+  if (parallel_for_ != nullptr) {
+    parallel_for_(total, perUnitSize, work);
+    return;
+  }
+
+  KERNEL_LOG_WARN("Function[%s] is null", kParallelForFunc);
+  work(0, total);
+}
+
+/*
+ * Get CPU number
+ */
+uint32_t DeviceSharder::GetCPUNum() const {
+  if (get_cpu_num_ != nullptr) {
+    return get_cpu_num_();
+  }
+
+  KERNEL_LOG_WARN("Function[%s] is null", kGetCPUNumFunc);
+  return 1;
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/device_sharder.h b/cpu_context/common/device_sharder.h
new file mode 100644
index 0000000000000000000000000000000000000000..3318a4c49c026cc5a70ef8a1fefcd2ea09fe89ad
--- /dev/null
+++ b/cpu_context/common/device_sharder.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_DEVICE_SHARDER_H_
+#define AICPU_CONTEXT_COMMON_DEVICE_SHARDER_H_
+#include "sharder.h"
+
+namespace aicpu {
+typedef void (*ParallelForFunc)(
+    int64_t total, int64_t perUnitSize,
+    const std::function<void(int64_t, int64_t)> &work);
+typedef uint32_t (*GetCPUNumFunc)();
+class DeviceSharder : public Sharder {
+ public:
+  explicit DeviceSharder(DeviceType device);
+
+  ~DeviceSharder();
+
+  /*
+   * ParallelFor shards the "total" units of work.
+   * @param total: size of total work
+   * @param perUnitSize: expect size of per unit work
+   * @param work: process of per unit work
+   */
+  void ParallelFor(
+      int64_t total, int64_t perUnitSize,
+      const std::function<void(int64_t, int64_t)> &work) const override;
+
+  /*
+   * Get CPU number
+   * @return CPU number
+   */
+  uint32_t GetCPUNum() const override;
+
+ private:
+  DeviceSharder(const DeviceSharder &) = delete;
+  DeviceSharder(DeviceSharder &&) = delete;
+  DeviceSharder &operator=(const DeviceSharder &) = delete;
+  DeviceSharder &operator=(DeviceSharder &&) = delete;
+
+ private:
+  void *sharder_;
+  ParallelForFunc parallel_for_;
+  GetCPUNumFunc get_cpu_num_;
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_DEVICE_SHARDER_H_
diff --git a/cpu_context/common/eigen_threadpool.cc b/cpu_context/common/eigen_threadpool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b5e7d9f7b035762a520a66c0baff5aa1cd6c84c2
--- /dev/null
+++ b/cpu_context/common/eigen_threadpool.cc
@@ -0,0 +1,119 @@
+﻿/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "eigen_threadpool.h"
+
+#include <sys/sysinfo.h>
+#include <unistd.h>
+
+#include "log.h"
+
+namespace {
+const uint32_t kTaskSize = 40000;
+const uint32_t kMaxOverShardingFactor = 4;
+const uint32_t kTotalCostFactor = 210000;
+constexpr uint32_t kMaxTaskSize = kTaskSize * kMaxOverShardingFactor;
+}  // namespace
+
+namespace aicpu {
+std::mutex EigenThreadPool::mutex_;
+bool EigenThreadPool::init_flag_(false);
+int32_t EigenThreadPool::core_num_(0);
+std::unique_ptr<Eigen::ThreadPool> EigenThreadPool::eigen_threadpool_(nullptr);
+std::unique_ptr<Eigen::ThreadPoolDevice> EigenThreadPool::threadpool_device_(
+    nullptr);
+
+EigenThreadPool *EigenThreadPool::GetInstance() {
+  KERNEL_LOG_INFO("EigenThreadPool GetInstance begin");
+  {
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (!init_flag_) {
+      core_num_ = get_nprocs();  // obtains the number of CPU cores that can be
+                                 // used by users.
+      if (core_num_ <= 0) {
+        KERNEL_LOG_INFO(
+            "Get the number of CPU cores that can be used failed, core "
+            "number[%d]",
+            core_num_);
+        return nullptr;
+      }
+      eigen_threadpool_.reset(new Eigen::ThreadPool(core_num_));
+      threadpool_device_.reset(
+          new Eigen::ThreadPoolDevice(eigen_threadpool_.get(), core_num_));
+      init_flag_ = true;
+      KERNEL_LOG_INFO("EigenThreadPool init success, core number[%d]",
+                      core_num_);
+    }
+  }
+
+  static EigenThreadPool instance;
+  KERNEL_LOG_INFO("EigenThreadPool GetInstance success");
+  return &instance;
+}
+
+void EigenThreadPool::ParallelFor(int64_t total, int64_t per_unit_size,
+                                  const SharderWork &work) {
+  KERNEL_LOG_INFO(
+      "Eigen threadpool parallel for begin, total[%lld], per_unit_size[%lld]",
+      total, per_unit_size);
+  if ((total <= 0) || (work == nullptr) || (per_unit_size <= 0)) {
+    KERNEL_LOG_ERROR(
+        "Invalid param: total[%lld] <= 0 or per_unit_size[%lld] <= 0 or work "
+        "is "
+        "nullptr",
+        total, per_unit_size);
+    return;
+  }
+
+  int64_t total_check = static_cast<int64_t>(static_cast<Eigen::Index>(total));
+  if (total_check != total) {
+    KERNEL_LOG_ERROR(
+        "Invalid param: total[%lld], value[%lld] after eigen conversion", total,
+        total_check);
+    return;
+  }
+
+  double per_unit_cost = 1.0;
+  if (per_unit_size >= total) {
+    // use the current thread to process the task
+    per_unit_cost = 1.0 * kTaskSize / total;
+  } else if ((per_unit_size) <= (total / core_num_)) {
+    // run tasks with the maximum number of threads, maximum =
+    // kMaxOverShardingFactor * core_num_
+    per_unit_cost = (1.0 * kMaxTaskSize * core_num_ / total) >
+                            (1.0 * kTotalCostFactor / total)
+                        ? (1.0 * kMaxTaskSize * core_num_ / total)
+                        : (1.0 * kTotalCostFactor / total);
+  } else {
+    // the task is fragmented based on the number of data slices.
+    per_unit_cost = 1.0 * kMaxTaskSize / per_unit_size;
+  }
+
+  KERNEL_LOG_INFO("Eigen threadpool parallel for, per_unit_cost[%.6f]",
+                  per_unit_cost);
+
+  threadpool_device_->parallelFor(
+      total, Eigen::TensorOpCost(0, 0, per_unit_cost),
+      [&work](Eigen::Index first, Eigen::Index last) { work(first, last); });
+  KERNEL_LOG_INFO("Eigen threadpool parallel for success");
+}
+
+/*
+ * Get CPU number
+ */
+uint32_t EigenThreadPool::GetCPUNum() {
+  return static_cast<uint32_t>(core_num_);
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/eigen_threadpool.h b/cpu_context/common/eigen_threadpool.h
new file mode 100644
index 0000000000000000000000000000000000000000..7257a2648c623a47b4bd097f48118e9cc988ab56
--- /dev/null
+++ b/cpu_context/common/eigen_threadpool.h
@@ -0,0 +1,61 @@
+﻿/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_EIGEN_THREAD_POOL_H_
+#define AICPU_CONTEXT_COMMON_EIGEN_THREAD_POOL_H_
+
+#include <functional>
+#include <memory>
+#include <mutex>
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+
+namespace aicpu {
+using SharderWork = std::function<void(int64_t, int64_t)>;
+
+class EigenThreadPool {
+ public:
+  static EigenThreadPool *GetInstance();
+
+  /*
+   * ParallelFor shards the "total" units of work.
+   */
+  void ParallelFor(int64_t total, int64_t perUnitSize, const SharderWork &work);
+
+  /*
+   * Get CPU number
+   * @return CPU number
+   */
+  uint32_t GetCPUNum();
+
+ private:
+  EigenThreadPool() = default;
+  ~EigenThreadPool() = default;
+
+  EigenThreadPool(const EigenThreadPool &) = delete;
+  EigenThreadPool(EigenThreadPool &&) = delete;
+  EigenThreadPool &operator=(const EigenThreadPool &) = delete;
+  EigenThreadPool &operator=(EigenThreadPool &&) = delete;
+
+ private:
+  static std::mutex mutex_;  // protect init_flag_
+  static bool init_flag_;    // true means initialized
+  static int32_t
+      core_num_;  // the number of CPU cores that can be used by users
+  static std::unique_ptr<Eigen::ThreadPool> eigen_threadpool_;
+  static std::unique_ptr<Eigen::ThreadPoolDevice> threadpool_device_;
+};
+};      // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_EIGEN_THREAD_POOL_H_
diff --git a/cpu_context/common/host_sharder.cc b/cpu_context/common/host_sharder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e2186ba38fd2e5546cba35348e5b1f3857e4fa41
--- /dev/null
+++ b/cpu_context/common/host_sharder.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "host_sharder.h"
+
+#include "eigen_threadpool.h"
+#include "log.h"
+
+namespace aicpu {
+/*
+ * ParallelFor shards the "total" units of work.
+ */
+void HostSharder::ParallelFor(
+    int64_t total, int64_t perUnitSize,
+    const std::function<void(int64_t, int64_t)> &work) const {
+  EigenThreadPool *threadpool = EigenThreadPool::GetInstance();
+  if (threadpool == nullptr) {
+    KERNEL_LOG_ERROR("Get eigen thread pool failed");
+    return;
+  }
+
+  threadpool->ParallelFor(total, perUnitSize, work);
+}
+
+/*
+ * Get CPU number
+ */
+uint32_t HostSharder::GetCPUNum() const {
+  EigenThreadPool *threadpool = EigenThreadPool::GetInstance();
+  if (threadpool == nullptr) {
+    KERNEL_LOG_ERROR("Get eigen thread pool failed");
+    return 0;
+  }
+
+  return threadpool->GetCPUNum();
+}
+}  // namespace aicpu
diff --git a/cpu_context/common/host_sharder.h b/cpu_context/common/host_sharder.h
new file mode 100644
index 0000000000000000000000000000000000000000..a78a2805fdb6d991a755de86fc7bf8d8b0e15514
--- /dev/null
+++ b/cpu_context/common/host_sharder.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_HOST_SHARDER_H_
+#define AICPU_CONTEXT_COMMON_HOST_SHARDER_H_
+#include "sharder.h"
+
+namespace aicpu {
+class HostSharder : public Sharder {
+ public:
+  explicit HostSharder(DeviceType device) : Sharder(device){};
+
+  ~HostSharder() = default;
+
+  /*
+   * ParallelFor shards the "total" units of work.
+   * @param total: size of total work
+   * @param perUnitSize: expect size of per unit work
+   * @param work: process of per unit work
+   */
+  void ParallelFor(
+      int64_t total, int64_t perUnitSize,
+      const std::function<void(int64_t, int64_t)> &work) const override;
+
+  /*
+   * Get CPU number
+   * @return CPU number
+   */
+  uint32_t GetCPUNum() const override;
+
+ private:
+  HostSharder(const HostSharder &) = delete;
+  HostSharder(HostSharder &&) = delete;
+  HostSharder &operator=(const HostSharder &) = delete;
+  HostSharder &operator=(HostSharder &&) = delete;
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_HOST_SHARDER_H_
diff --git a/cpu_context/common/kernel_cache.h b/cpu_context/common/kernel_cache.h
new file mode 100644
index 0000000000000000000000000000000000000000..32415dd6448d39d9a023120ba418e9629932eeb5
--- /dev/null
+++ b/cpu_context/common/kernel_cache.h
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_KERNEL_CACHE_H_
+#define AICPU_CONTEXT_COMMON_KERNEL_CACHE_H_
+
+#include <stdint.h>
+
+#include <list>
+#include <memory>
+#include <unordered_map>
+#include <mutex>
+
+#include "log.h"
+
+namespace aicpu {
+template <class T>
+class KernelCache {
+ public:
+  KernelCache() : sess_flag_(false), capacity_(1) {}
+  virtual ~KernelCache() = default;
+
+  /*
+   * Init kernel cache.
+   * @param sess_flag: whether it's a session scene, false need to support LRU
+   * algorithm
+   * @return int32_t: 0 indicates success, while the others fail
+   */
+  int32_t Init(bool sess_flag) {
+    sess_flag_ = sess_flag;
+    return InitParameter();
+  }
+
+  /*
+   * run kernel.
+   * @param param: kernel context
+   * @return int32_t: 0 indicates success, whilWe the others fail
+   */
+  virtual int32_t RunKernel(void *param) = 0;
+
+  /*
+   * get kernel cache, the lru algorithm is supported in non-session scenarios
+   * @param key: kernel id
+   * @return T *: cache content pointer
+   */
+  T *GetCache(uint64_t key) {
+    KERNEL_LOG_DEBUG("GetCache begin, key[%llu].", key);
+    T *ret = nullptr;
+    std::unique_lock<std::mutex> lock(kernel_mutex_);
+    auto it = kernel_cache_iter_.find(key);
+    if (it != kernel_cache_iter_.end()) {
+      KERNEL_LOG_DEBUG("GetCache success, key[%llu].", key);
+      ret = it->second->second.get();
+      if (!sess_flag_) {
+        auto pair_iter = it->second;
+        std::pair<uint64_t, std::shared_ptr<T>> pair = *pair_iter;
+        kernel_cache_.erase(pair_iter);
+        kernel_cache_.push_front(pair);
+        kernel_cache_iter_[key] = kernel_cache_.begin();
+      }
+    }
+    return ret;
+  }
+
+  /*
+   * set kernel cache, the lru algorithm is supported in non-session scenarios
+   * @param key: kernel id
+   * @param value: cache content
+   */
+  void SetCache(uint64_t key, std::shared_ptr<T> value) {
+    KERNEL_LOG_DEBUG("SetCache begin, key[%llu].", key);
+    std::unique_lock<std::mutex> lock(kernel_mutex_);
+    auto iter = kernel_cache_iter_.find(key);
+    if (iter != kernel_cache_iter_.end()) {
+      KERNEL_LOG_DEBUG("SetCache update cache, key[%llu].", key);
+      auto pair_iter = iter->second;
+      pair_iter->second = value;
+      if (!sess_flag_) {
+        std::pair<uint64_t, std::shared_ptr<T>> pair = *pair_iter;
+        kernel_cache_.erase(pair_iter);
+        kernel_cache_.push_front(pair);
+        kernel_cache_iter_[key] = kernel_cache_.begin();
+      }
+    } else {
+      std::pair<uint64_t, std::shared_ptr<T>> pair = std::make_pair(key, value);
+      if ((capacity_ < kernel_cache_.size()) && (!sess_flag_)) {
+        uint64_t del_key = kernel_cache_.back().first;
+        KERNEL_LOG_DEBUG(
+            "SetCache is full, pop last element, capacity[%u], delete "
+            "key[%llu].",
+            capacity_, key);
+        kernel_cache_.pop_back();
+        auto del_iter = kernel_cache_iter_.find(del_key);
+        if (del_iter != kernel_cache_iter_.end()) {
+          kernel_cache_iter_.erase(del_iter);
+        }
+      }
+      KERNEL_LOG_DEBUG("SetCache success, key[%llu].", key);
+      kernel_cache_.push_front(pair);
+      kernel_cache_iter_[key] = kernel_cache_.begin();
+    }
+  }
+
+  /*
+   * get session flag, true means session scene
+   * @return bool: whether it's a session scene
+   */
+  bool GetSessionFlag() { return sess_flag_; }
+
+  /*
+   * get kernel cache capacity
+   * @return uint32_t: lru capacity
+   */
+  uint32_t GetCapacity() { return capacity_; }
+
+  /*
+   * set kernel cache capacity
+   * @param capacity: lru capacity
+   */
+  void SetCapacity(uint32_t capacity) { capacity_ = capacity; }
+
+  /*
+   * get all kernel cache
+   * @return std::list<std::pair<uint64_t, std::shared_ptr<T>>>: all cache,
+   * pair<kernel id, cahce>
+   */
+  std::list<std::pair<uint64_t, std::shared_ptr<T>>> GetAllKernelCache() {
+    return kernel_cache_;
+  }
+
+ protected:
+  virtual int32_t InitParameter() = 0;
+
+ private:
+  KernelCache(const KernelCache &) = delete;
+  KernelCache(KernelCache &&) = delete;
+  KernelCache &operator=(const KernelCache &) = delete;
+  KernelCache &operator=(KernelCache &&) = delete;
+
+  bool sess_flag_;  // whether it's a session scene, false need to support LRU
+  uint32_t capacity_;  // lru capacity
+  std::mutex kernel_mutex_;
+  std::list<std::pair<uint64_t, std::shared_ptr<T>>>
+      kernel_cache_;  // all kernel cache, key is kernel id
+  std::unordered_map<
+      uint64_t, typename std::list<std::pair<uint64_t, std::shared_ptr<T>>>::
+                    iterator>  // iterator of kernel cahce, key is kernel id
+      kernel_cache_iter_;
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_KERNEL_CACHE_H_
diff --git a/cpu_context/common/log.h b/cpu_context/common/log.h
new file mode 100644
index 0000000000000000000000000000000000000000..ef7dd4b60ff13edeaed23cf7fdd576baaf611352
--- /dev/null
+++ b/cpu_context/common/log.h
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_LOG_H_
+#define AICPU_CONTEXT_COMMON_LOG_H_
+
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "toolchain/slog.h"
+
+#define GET_TID() syscall(__NR_gettid)
+const char KERNEL_MODULE[] = "AICPU";
+
+#ifdef RUN_TEST
+#define KERNEL_LOG_DEBUG(fmt, ...)                                    \
+  printf("[DEBUG] [%s][%s][%s:%d][tid:%lu]:" fmt "\n", KERNEL_MODULE, \
+         __FILE__, __FUNCTION__, __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_INFO(fmt, ...)                                              \
+  printf("[INFO] [%s][%s][%s:%d][tid:%lu]:" fmt "\n", KERNEL_MODULE, __FILE__, \
+         __FUNCTION__, __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_WARN(fmt, ...)                                              \
+  printf("[WARN] [%s][%s][%s:%d][tid:%lu]:" fmt "\n", KERNEL_MODULE, __FILE__, \
+         __FUNCTION__, __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_ERROR(fmt, ...)                                    \
+  printf("[ERROR] [%s][%s][%s:%d][tid:%lu]:" fmt "\n", KERNEL_MODULE, \
+         __FILE__, __FUNCTION__, __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_EVENT(fmt, ...)                                    \
+  printf("[EVENT] [%s][%s][%s:%d][tid:%lu]:" fmt "\n", KERNEL_MODULE, \
+         __FILE__, __FUNCTION__, __LINE__, GET_TID(), ##__VA_ARGS__)
+#else
+#define KERNEL_LOG_DEBUG(fmt, ...)                                            \
+  dlog_debug(AICPU, "[%s][%s:%d][tid:%lu]:" fmt, KERNEL_MODULE, __FUNCTION__, \
+             __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_INFO(fmt, ...)                                            \
+  dlog_info(AICPU, "[%s][%s:%d][tid:%lu]:" fmt, KERNEL_MODULE, __FUNCTION__, \
+            __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_WARN(fmt, ...)                                            \
+  dlog_warn(AICPU, "[%s][%s:%d][tid:%lu]:" fmt, KERNEL_MODULE, __FUNCTION__, \
+            __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_ERROR(fmt, ...)                                            \
+  dlog_error(AICPU, "[%s][%s:%d][tid:%lu]:" fmt, KERNEL_MODULE, __FUNCTION__, \
+             __LINE__, GET_TID(), ##__VA_ARGS__)
+#define KERNEL_LOG_EVENT(fmt, ...)                                            \
+  dlog_event(AICPU, "[%s][%s:%d][tid:%lu]:" fmt, KERNEL_MODULE, __FUNCTION__, \
+             __LINE__, GET_TID(), ##__VA_ARGS__)
+#endif
+
+#define KERNEL_CHECK_NULLPTR_VOID(value, logText...) \
+  if (value == nullptr) {                            \
+    KERNEL_LOG_ERROR(logText);                       \
+    return;                                          \
+  }
+
+#define KERNEL_CHECK_NULLPTR(value, errorCode, logText...) \
+  if (value == nullptr) {                                  \
+    KERNEL_LOG_ERROR(logText);                             \
+    return errorCode;                                      \
+  }
+
+#define KERNEL_CHECK_ASSIGN_64S_MULTI(A, B, result, errorCode)            \
+  if ((A) != 0 && (B) != 0 && ((INT64_MAX) / (A)) <= (B)) {               \
+    KERNEL_LOG_ERROR("Integer reversed multiA: %llu * multiB: %llu", (A), \
+                     (B));                                                \
+    return errorCode;                                                     \
+  }                                                                       \
+  (result) = ((A) * (B));
+
+#endif  // AICPU_CONTEXT_COMMON_LOG_H_
diff --git a/cpu_context/common/session_cache.h b/cpu_context/common/session_cache.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e6e0792b598b430b2af647473f886e848bcac2f
--- /dev/null
+++ b/cpu_context/common/session_cache.h
@@ -0,0 +1,110 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_SESSION_CACHE_H_
+#define AICPU_CONTEXT_COMMON_SESSION_CACHE_H_
+
+#include <map>
+#include <mutex>
+
+#include "kernel_cache.h"
+
+namespace aicpu {
+template <class C>
+class SessionCache {
+ public:
+  static SessionCache<C> &Instance() {
+    static SessionCache<C> instance;
+    return instance;
+  }
+
+  /*
+   * run and cache kernel.
+   * @param param: kernel context
+   * @param session_id: sesson id
+   * @param stream_id: stream id
+   * @param sess_flag: whether it's a session scene, true use session id, false
+   * use stream id
+   * @return int32_t: 0 indicates success, while the others fail
+   */
+  template <class T>
+  int32_t RunKernel(void *param, uint64_t session_id, uint64_t stream_id,
+                    bool sess_flag) {
+    std::shared_ptr<KernelCache<C>> kernel = nullptr;
+    if (sess_flag) {
+      KERNEL_LOG_DEBUG("SessionCache KernelCache from session, id[%llu].",
+                       session_id);
+      std::unique_lock<std::mutex> lock(session_mutex_);
+      int32_t ret = GetOrCreateKernelCache<T>(session_kernel_cache_, session_id,
+                                              sess_flag, kernel);
+      if (ret != 0) {
+        return ret;
+      }
+    } else {
+      KERNEL_LOG_DEBUG("SessionCache KernelCache from stream, id[%llu].",
+                       stream_id);
+      std::unique_lock<std::mutex> lock(stream_mutex_);
+      int32_t ret = GetOrCreateKernelCache<T>(stream_kernel_cache_, stream_id,
+                                              sess_flag, kernel);
+      if (ret != 0) {
+        return ret;
+      }
+    }
+    return kernel->RunKernel(param);
+  }
+
+ private:
+  SessionCache() = default;
+  ~SessionCache() = default;
+  SessionCache(const SessionCache &) = delete;
+  SessionCache(SessionCache &&) = delete;
+  SessionCache &operator=(const SessionCache &) = delete;
+  SessionCache &operator=(SessionCache &&) = delete;
+
+  template <class T>
+  int32_t GetOrCreateKernelCache(
+      std::map<uint64_t, std::shared_ptr<KernelCache<C>>> &kernel_map,
+      uint64_t id, bool sess_flag, std::shared_ptr<KernelCache<C>> &kernel) {
+    auto iter = kernel_map.find(id);
+    if (iter != kernel_map.end()) {
+      KERNEL_LOG_DEBUG("Get kernel from cache success, id[%llu].", id);
+      kernel = iter->second;
+    } else {
+      KernelCache<C> *cache = new (std::nothrow) T();
+      if (cache == nullptr) {
+        KERNEL_LOG_DEBUG("Create kernel cache failed, id[%llu].", id);
+        return -1;
+      }
+      kernel = std::shared_ptr<KernelCache<C>>(cache);
+      int32_t ret = kernel->Init(sess_flag);
+      if (ret != 0) {
+        return ret;
+      }
+      kernel_map.insert(std::make_pair(id, kernel));
+      KERNEL_LOG_DEBUG("Create kernel cache, id[%llu].", id);
+    }
+    return 0;
+  }
+
+ private:
+  std::mutex stream_mutex_;
+  std::map<uint64_t, std::shared_ptr<KernelCache<C>>>
+      stream_kernel_cache_;  // key is stream id
+  std::mutex session_mutex_;
+  std::map<uint64_t, std::shared_ptr<KernelCache<C>>>
+      session_kernel_cache_;  // key is session id
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_SESSION_CACHE_H_
diff --git a/cpu_context/common/sharder.h b/cpu_context/common/sharder.h
new file mode 100644
index 0000000000000000000000000000000000000000..85355538bb91f26e04d13fb5a62fefc550ba34b6
--- /dev/null
+++ b/cpu_context/common/sharder.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_SHARDER_H_
+#define AICPU_CONTEXT_COMMON_SHARDER_H_
+#include <functional>
+
+#include "cpu_types.h"
+
+namespace aicpu {
+class Sharder {
+ public:
+  explicit Sharder(DeviceType device) : device_(device) {}
+
+  virtual ~Sharder() = default;
+
+  /*
+   * ParallelFor shards the "total" units of work.
+   * @param total: size of total work
+   * @param perUnitSize: expect size of per unit work
+   * @param work: process of per unit work
+   */
+  virtual void ParallelFor(
+      int64_t total, int64_t perUnitSize,
+      const std::function<void(int64_t, int64_t)> &work) const = 0;
+
+  /*
+   * Get CPU number
+   * @return CPU number
+   */
+  virtual uint32_t GetCPUNum() const = 0;
+
+ private:
+  Sharder(const Sharder &) = delete;
+  Sharder(Sharder &&) = delete;
+  Sharder &operator=(const Sharder &) = delete;
+  Sharder &operator=(Sharder &&) = delete;
+
+ private:
+  DeviceType device_;  // device type, HOST/DEVICE
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_SHARDER_H_
diff --git a/cpu_context/common/status.h b/cpu_context/common/status.h
new file mode 100644
index 0000000000000000000000000000000000000000..828bc8130ac98f7e0d0ce0e707c80a8063ff87b3
--- /dev/null
+++ b/cpu_context/common/status.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_STATUS_H_
+#define AICPU_CONTEXT_COMMON_STATUS_H_
+
+#include <cstdint>
+
+namespace aicpu {
+/*
+ * status code
+ */
+enum KernelStatus : uint32_t {
+  // 0-3 is fixed error code, runtime need interprete 0-3 error codes
+  KERNEL_STATUS_OK = 0,
+  KERNEL_STATUS_PARAM_INVALID = 1,
+  KERNEL_STATUS_INNER_ERROR = 2,
+  KERNEL_STATUS_TIMEOUT = 3,
+  KERNEL_STATUS_PROTOBUF_ERROR,
+  KERNEL_STATUS_SHARDER_ERROR
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_STATUS_H_
diff --git a/cpu_context/common/thread_ctx.h b/cpu_context/common/thread_ctx.h
new file mode 100644
index 0000000000000000000000000000000000000000..1c985bc8961bde7ceed83254fcc4fc3ff867adba
--- /dev/null
+++ b/cpu_context/common/thread_ctx.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_COMMON_THREAD_CTX_H_
+#define AICPU_CONTEXT_COMMON_THREAD_CTX_H_
+#include "cpu_types.h"
+#include "aicpu_context.h"
+
+namespace aicpu {
+class ThreadCtx {
+ public:
+  explicit ThreadCtx(DeviceType device) : device_(device) {}
+
+  virtual ~ThreadCtx() = default;
+
+  virtual uint32_t SetThreadCtxInfo(CtxType type, const std::string &key,
+                                    const std::string &value) const = 0;
+
+  virtual uint32_t GetThreadCtxInfo(CtxType type, const std::string &key,
+                                    std::string &value) const = 0;
+
+  virtual uint32_t RemoveThreadCtxInfo(CtxType type,
+                                       const std::string &key) const = 0;
+
+ private:
+  ThreadCtx(const ThreadCtx &) = delete;
+  ThreadCtx(ThreadCtx &&) = delete;
+  ThreadCtx &operator=(const ThreadCtx &) = delete;
+  ThreadCtx &operator=(ThreadCtx &&) = delete;
+
+ private:
+  DeviceType device_;  // device type, HOST/DEVICE
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_COMMON_THREAD_CTX_H_
diff --git a/cpu_context/cpu_proto/attr_value.cc b/cpu_context/cpu_proto/attr_value.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a36ea842b77b4f697430d26a8bf79b9475c73277
--- /dev/null
+++ b/cpu_context/cpu_proto/attr_value.cc
@@ -0,0 +1,277 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "attr_value_impl.h"
+#include "cpu_attr_value.h"
+
+namespace aicpu {
+AttrValue::AttrValue(AttrValueImpl *impl) : impl_(impl) {}
+
+/*
+ * get string value of attr.
+ */
+std::string AttrValue::GetString() const { return impl_->GetString(); }
+
+/*
+ * get string list size of attr.
+ */
+int32_t AttrValue::ListStringSize() const { return impl_->ListStringSize(); }
+
+/*
+ * get string list value of attr.
+ */
+std::vector<std::string> AttrValue::GetListString() const {
+  return impl_->GetListString();
+}
+
+/*
+ * set string list value to attr.
+ */
+void AttrValue::SetListString(const std::vector<std::string> &bytes) {
+  impl_->SetListString(bytes);
+}
+
+/*
+ * set string value to attr.
+ */
+void AttrValue::SetString(const std::string &byte) { impl_->SetString(byte); }
+
+/*
+ * attr add string value to list.
+ */
+void AttrValue::AddListString(const std::string &str) {
+  impl_->AddListString(str);
+}
+
+/*
+ * get int value of attr.
+ */
+int64_t AttrValue::GetInt() const { return impl_->GetInt(); }
+
+/*
+ * get int list value of attr.
+ */
+std::vector<int64_t> AttrValue::GetListInt() const {
+  return impl_->GetListInt();
+}
+
+/*
+ * attr add int value to list.
+ */
+void AttrValue::AddListInt(int64_t i) { impl_->AddListInt(i); }
+
+/*
+ * get int list size of attr.
+ */
+int32_t AttrValue::ListIntSize() const { return impl_->ListIntSize(); }
+
+/*
+ * set int value to attr.
+ */
+void AttrValue::SetInt(int64_t i) { impl_->SetInt(i); }
+
+/*
+ * set int list value to attr.
+ */
+void AttrValue::SetListInt(const std::vector<int64_t> &list) {
+  impl_->SetListInt(list);
+}
+
+/*
+ * get float value of attr.
+ */
+float AttrValue::GetFloat() const { return impl_->GetFloat(); }
+
+/*
+ * get float list value of attr.
+ */
+std::vector<float> AttrValue::GetListFloat() const {
+  return impl_->GetListFloat();
+}
+
+/*
+ * attr add float value to list.
+ */
+void AttrValue::AddListFloat(float f) { impl_->AddListFloat(f); }
+
+/*
+ * set float value to attr.
+ */
+void AttrValue::SetFloat(float f) { impl_->SetFloat(f); }
+
+/*
+ * get float list size of attr.
+ */
+int32_t AttrValue::ListFloatSize() const { return impl_->ListFloatSize(); }
+
+/*
+ * set float list value to attr.
+ */
+void AttrValue::SetListFloat(const std::vector<float> &list) {
+  impl_->SetListFloat(list);
+}
+
+/*
+ * get bool value of attr.
+ */
+bool AttrValue::GetBool() const { return impl_->GetBool(); }
+
+/*
+ * get bool list value of attr.
+ */
+std::vector<bool> AttrValue::GetListBool() const {
+  return impl_->GetListBool();
+}
+
+/*
+ * attr add bool value to list.
+ */
+void AttrValue::AddListBool(bool b) { impl_->AddListBool(b); }
+
+/*
+ * get bool list size of attr.
+ */
+int32_t AttrValue::ListBoolSize() const { return impl_->ListBoolSize(); }
+
+/*
+ * set bool value to attr.
+ */
+void AttrValue::SetBool(bool b) { impl_->SetBool(b); }
+
+/*
+ * set bool list value to attr.
+ */
+void AttrValue::SetListBool(const std::vector<bool> &list) {
+  return impl_->SetListBool(list);
+}
+
+/*
+ * get data type value of attr.
+ */
+DataType AttrValue::GetDataType() const { return impl_->GetDataType(); }
+
+/*
+ * get data type list value of attr.
+ */
+std::vector<DataType> AttrValue::GetListDataType() const {
+  return impl_->GetListDataType();
+}
+
+/*
+ * attr add data type value to list.
+ */
+void AttrValue::AddListDataType(DataType type) { impl_->AddListDataType(type); }
+
+/*
+ * get data type list size of attr.
+ */
+int32_t AttrValue::ListDataTypeSize() const {
+  return impl_->ListDataTypeSize();
+}
+
+/*
+ * set data type value to attr.
+ */
+void AttrValue::SetDataType(DataType type) { impl_->SetDataType(type); }
+
+/*
+ * set data type list value to attr.
+ */
+void AttrValue::SetListDataType(const std::vector<DataType> &list) {
+  impl_->SetListDataType(list);
+}
+
+/*
+ * set tensor shape value to attr.
+ */
+bool AttrValue::SetTensorShape(const TensorShape *shape) {
+  return impl_->SetTensorShape(shape);
+}
+
+/*
+ * set tensor shape list value to attr.
+ */
+uint32_t AttrValue::SetListTensorShape(const std::vector<TensorShape *> &list) {
+  return impl_->SetListTensorShape(list);
+}
+
+/*
+ * attr add tensor shape value to list.
+ */
+std::shared_ptr<TensorShape> AttrValue::AddListTensorShape() {
+  return impl_->AddListTensorShape();
+}
+
+/*
+ * get tensor shape value of attr.
+ */
+std::shared_ptr<TensorShape> AttrValue::GetTensorShape() const {
+  return impl_->GetTensorShape();
+}
+
+/*
+ * get tensor shape list value of attr.
+ */
+std::vector<TensorShape> AttrValue::GetListTensorShape() const {
+  return impl_->GetListTensorShape();
+}
+
+/*
+ * get tensor shape list size of attr.
+ */
+int32_t AttrValue::ListTensorShapeSize() const {
+  return impl_->ListTensorShapeSize();
+}
+
+/*
+ * set tensor value to attr.
+ */
+bool AttrValue::SetTensor(const Tensor *tensor) {
+  return impl_->SetTensor(tensor);
+}
+
+/*
+ * set tensor list value to attr.
+ */
+uint32_t AttrValue::SetListTensor(const std::vector<Tensor *> &list) {
+  return impl_->SetListTensor(list);
+}
+
+/*
+ * attr add tensor value to list.
+ */
+std::shared_ptr<Tensor> AttrValue::AddListTensor() {
+  return impl_->AddListTensor();
+}
+
+/*
+ * get tensor value of attr.
+ */
+std::shared_ptr<Tensor> AttrValue::GetTensor() const {
+  return impl_->GetTensor();
+}
+
+/*
+ * get tensor list value of attr.
+ */
+std::vector<Tensor> AttrValue::GetListTensor() const {
+  return impl_->GetListTensor();
+}
+
+/*
+ * get tensor list size of attr.
+ */
+int32_t AttrValue::ListTensorSize() const { return impl_->ListTensorSize(); }
+}  // namespace aicpu
diff --git a/cpu_context/cpu_proto/attr_value_impl.cc b/cpu_context/cpu_proto/attr_value_impl.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3a899362ad16a02b14703925a383377d533f5598
--- /dev/null
+++ b/cpu_context/cpu_proto/attr_value_impl.cc
@@ -0,0 +1,548 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "attr_value_impl.h"
+
+#include "cpu_kernel_utils.h"
+#include "log.h"
+#include "tensor_impl.h"
+#include "tensor_shape_impl.h"
+
+namespace aicpu {
+/*
+ * get string value of attr.
+ */
+std::string AttrValueImpl::GetString() const { return attr_value_->s(); }
+
+/*
+ * get string list size of attr.
+ */
+int32_t AttrValueImpl::ListStringSize() const {
+  auto array = attr_value_->array();
+  return array.s_size();
+}
+
+/*
+ * get string list value of attr.
+ */
+std::vector<std::string> AttrValueImpl::GetListString() const {
+  std::vector<std::string> ret;
+  auto array = attr_value_->array();
+  for (int32_t i = 0; i < array.s_size(); i++) {
+    ret.emplace_back(array.s(i));
+  }
+  return ret;
+}
+
+/*
+ * set string list value to attr.
+ */
+void AttrValueImpl::SetListString(const std::vector<std::string> &bytes) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  for (const std::string &s : bytes) {
+    array->add_s(s);
+  }
+}
+
+/*
+ * set string value to attr.
+ */
+void AttrValueImpl::SetString(const std::string &byte) {
+  attr_value_->set_s(byte);
+}
+
+/*
+ * attr add string value to list.
+ */
+void AttrValueImpl::AddListString(const std::string &str) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  array->add_s(str);
+}
+
+/*
+ * get int value of attr.
+ */
+int64_t AttrValueImpl::GetInt() const { return attr_value_->i(); }
+
+/*
+ * get int list value of attr.
+ */
+std::vector<int64_t> AttrValueImpl::GetListInt() const {
+  std::vector<int64_t> ret;
+  auto array = attr_value_->array();
+  for (int32_t i = 0; i < array.i_size(); i++) {
+    ret.emplace_back(array.i(i));
+  }
+  return ret;
+}
+
+/*
+ * attr add int value to list.
+ */
+void AttrValueImpl::AddListInt(int64_t i) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  array->add_i(i);
+}
+
+/*
+ * get int list size of attr.
+ */
+int32_t AttrValueImpl::ListIntSize() const {
+  auto array = attr_value_->array();
+  return array.i_size();
+}
+
+/*
+ * set int value to attr.
+ */
+void AttrValueImpl::SetInt(int64_t i) { attr_value_->set_i(i); }
+
+/*
+ * set int list value to attr.
+ */
+void AttrValueImpl::SetListInt(const std::vector<int64_t> &list) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  for (const int64_t &i : list) {
+    array->add_i(i);
+  }
+}
+
+/*
+ * get float value of attr.
+ */
+float AttrValueImpl::GetFloat() const { return attr_value_->f(); }
+
+/*
+ * get float list value of attr.
+ */
+std::vector<float> AttrValueImpl::GetListFloat() const {
+  std::vector<float> ret;
+  auto array = attr_value_->array();
+  for (int32_t i = 0; i < array.f_size(); i++) {
+    ret.emplace_back(array.f(i));
+  }
+  return ret;
+}
+
+/*
+ * attr add float value to list.
+ */
+void AttrValueImpl::AddListFloat(float f) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  array->add_f(f);
+}
+
+/*
+ * set float value to attr.
+ */
+void AttrValueImpl::SetFloat(float f) { attr_value_->set_f(f); }
+
+/*
+ * get float list size of attr.
+ */
+int32_t AttrValueImpl::ListFloatSize() const {
+  auto array = attr_value_->array();
+  return array.f_size();
+}
+
+/*
+ * set float list value to attr.
+ */
+void AttrValueImpl::SetListFloat(const std::vector<float> &list) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  for (const float &f : list) {
+    array->add_f(f);
+  }
+}
+
+/*
+ * get bool value of attr.
+ */
+bool AttrValueImpl::GetBool() const { return attr_value_->b(); }
+
+/*
+ * get bool list value of attr.
+ */
+std::vector<bool> AttrValueImpl::GetListBool() const {
+  std::vector<bool> ret;
+  auto array = attr_value_->array();
+  for (int32_t i = 0; i < array.b_size(); i++) {
+    ret.push_back(array.b(i));
+  }
+  return ret;
+}
+
+/*
+ * attr add bool value to list.
+ */
+void AttrValueImpl::AddListBool(bool b) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  array->add_b(b);
+}
+
+/*
+ * get bool list size of attr.
+ */
+int32_t AttrValueImpl::ListBoolSize() const {
+  auto array = attr_value_->array();
+  return array.b_size();
+}
+
+/*
+ * set bool value to attr.
+ */
+void AttrValueImpl::SetBool(bool b) { attr_value_->set_b(b); }
+
+/*
+ * set bool list value to attr.
+ */
+void AttrValueImpl::SetListBool(const std::vector<bool> &list) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  for (const bool &b : list) {
+    array->add_b(b);
+  }
+}
+
+/*
+ * get data type value of attr.
+ */
+DataType AttrValueImpl::GetDataType() const {
+  return static_cast<DataType>(attr_value_->type());
+}
+
+/*
+ * get data type list value of attr.
+ */
+std::vector<DataType> AttrValueImpl::GetListDataType() const {
+  std::vector<DataType> ret;
+  auto array = attr_value_->array();
+  for (int32_t i = 0; i < array.type_size(); i++) {
+    ret.emplace_back(static_cast<DataType>(array.type(i)));
+  }
+  return ret;
+}
+
+/*
+ * attr add data type value to list.
+ */
+void AttrValueImpl::AddListDataType(DataType type) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  array->add_type(type);
+}
+
+/*
+ * get data type list size of attr.
+ */
+int32_t AttrValueImpl::ListDataTypeSize() const {
+  auto array = attr_value_->array();
+  return array.type_size();
+}
+
+/*
+ * set data type value to attr.
+ */
+void AttrValueImpl::SetDataType(DataType type) { attr_value_->set_type(type); }
+
+/*
+ * set data type list value to attr.
+ */
+void AttrValueImpl::SetListDataType(const std::vector<DataType> &list) {
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR_VOID(array, "Protobuf mutable array is nullptr")
+  for (const DataType &type : list) {
+    array->add_type(type);
+  }
+}
+
+/*
+ * set tensor shape value to attr.
+ */
+bool AttrValueImpl::SetTensorShape(const TensorShape *shape) {
+  KERNEL_CHECK_NULLPTR(shape, false, "Shape is null")
+
+  auto tensorShape = attr_value_->mutable_shape();
+  KERNEL_CHECK_NULLPTR(tensorShape, false,
+                       "Protobuf mutable tensor shape is null")
+  auto impl = CpuKernelUtils::GetImpl(shape);
+  KERNEL_CHECK_NULLPTR(impl, false, "Get impl is null")
+  auto proto = impl->GetProto();
+  KERNEL_CHECK_NULLPTR(proto, false, "Get proto is null")
+  *tensorShape = *(impl->GetProto());
+  return true;
+}
+
+/*
+ * set tensor shape list value to attr.
+ */
+uint32_t AttrValueImpl::SetListTensorShape(
+    const std::vector<TensorShape *> &list) {
+  uint32_t ret = 0;
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR(array, ret, "Protobuf mutable array is nullptr")
+
+  for (size_t i = 0; i < list.size(); i++) {
+    auto tmpShape = array->add_shape();
+    if ((list[i] == nullptr) || (tmpShape == nullptr)) {
+      KERNEL_LOG_ERROR("Shape[%zu] is null or protobuf add shape ret null.", i);
+    } else {
+      auto impl = CpuKernelUtils::GetImpl(list[i]);
+      if ((impl == nullptr) || (impl->GetProto() == nullptr)) {
+        KERNEL_LOG_ERROR("Get list[%zu] impl or proto is null.", i);
+        continue;
+      }
+      *tmpShape = *(impl->GetProto());
+      ret++;
+    }
+  }
+
+  return ret;
+}
+
+/*
+ * attr add tensor shape value to list.
+ */
+std::shared_ptr<TensorShape> AttrValueImpl::AddListTensorShape() {
+  auto array = attr_value_->mutable_array();
+  if (array == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable array is nullptr.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  auto shape = array->add_shape();
+  if (shape == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable array add shape is nullptr.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  TensorShapeImpl *impl = new (std::nothrow) TensorShapeImpl(shape);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorShapeImpl failed.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  auto tensorShape = CpuKernelUtils::CreateTensorShape(impl);
+  if (tensorShape == nullptr) {
+    delete impl;
+  }
+  return tensorShape;
+}
+
+/*
+ * get tensor shape value of attr.
+ */
+std::shared_ptr<TensorShape> AttrValueImpl::GetTensorShape() const {
+  auto shape = attr_value_->mutable_shape();
+  if (shape == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable shape is nullptr.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  TensorShapeImpl *impl = new (std::nothrow) TensorShapeImpl(shape);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorShapeImpl failed.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  auto tensorShape = CpuKernelUtils::CreateTensorShape(impl);
+  if (tensorShape == nullptr) {
+    delete impl;
+  }
+  return tensorShape;
+}
+
+/*
+ * get tensor shape list value of attr.
+ */
+std::vector<TensorShape> AttrValueImpl::GetListTensorShape() const {
+  std::vector<TensorShape> ret;
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR(array, ret, "Protobuf mutable array is nullptr")
+  for (int32_t i = 0; i < array->shape_size(); i++) {
+    auto shape = array->mutable_shape(i);
+    if (shape == nullptr) {
+      KERNEL_LOG_ERROR("Protobuf mutable shape[%d] is nullptr.", i);
+      return std::vector<TensorShape>();
+    }
+
+    TensorShapeImpl *impl = new (std::nothrow) TensorShapeImpl(shape);
+    if (impl == nullptr) {
+      KERNEL_LOG_ERROR("Create TensorShapeImpl[%d] failed.", i);
+      return std::vector<TensorShape>();
+    } else {
+      auto tensorShape = CpuKernelUtils::CreateTensorShape(impl);
+      if (tensorShape == nullptr) {
+        delete impl;
+        return std::vector<TensorShape>();
+      }
+      ret.emplace_back(*tensorShape);
+    }
+  }
+  return ret;
+}
+
+/*
+ * get tensor shape list size of attr.
+ */
+int32_t AttrValueImpl::ListTensorShapeSize() const {
+  auto array = attr_value_->array();
+  return array.shape_size();
+}
+
+/*
+ * set tensor value to attr.
+ */
+bool AttrValueImpl::SetTensor(const Tensor *tensor) {
+  KERNEL_CHECK_NULLPTR(tensor, false, "Tensor is null")
+  auto tensorPtr = attr_value_->mutable_tensor();
+  KERNEL_CHECK_NULLPTR(tensorPtr, false, "Protobuf mutable tensor is nullptr")
+  auto impl = CpuKernelUtils::GetImpl(tensor);
+  KERNEL_CHECK_NULLPTR(impl, false, "Get impl is nullptr")
+  auto proto = impl->GetProto();
+  KERNEL_CHECK_NULLPTR(proto, false, "Get proto is nullptr")
+  *tensorPtr = *(proto);
+  return true;
+}
+
+/*
+ * set tensor list value to attr.
+ */
+uint32_t AttrValueImpl::SetListTensor(const std::vector<Tensor *> &list) {
+  uint32_t ret = 0;
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR(array, ret, "Protobuf mutable array is nullptr")
+  for (size_t i = 0; i < list.size(); i++) {
+    auto tensorPtr = array->add_tensor();
+    if ((list[i] == nullptr) || (tensorPtr == nullptr)) {
+      KERNEL_LOG_WARN("Tensor[%zu] is null or protobuf add tensor ret null.",
+                      i);
+    } else {
+      auto impl = CpuKernelUtils::GetImpl(list[i]);
+      if ((impl == nullptr) || (impl->GetProto() == nullptr)) {
+        KERNEL_LOG_WARN("Get list[%zu] impl or proto is null.", i);
+        continue;
+      }
+      *tensorPtr = *(impl->GetProto());
+      ret++;
+    }
+  }
+  return ret;
+}
+
+/*
+ * attr add tensor value to list.
+ */
+std::shared_ptr<Tensor> AttrValueImpl::AddListTensor() {
+  auto array = attr_value_->mutable_array();
+  if (array == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable array is nullptr.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto tensor = array->add_tensor();
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable array add tensor is nullptr.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpuTensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpuTensor == nullptr) {
+    delete impl;
+  }
+  return aicpuTensor;
+}
+
+/*
+ * get tensor value of attr.
+ */
+std::shared_ptr<Tensor> AttrValueImpl::GetTensor() const {
+  auto tensor = attr_value_->mutable_tensor();
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable tensor is nullptr.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpuTensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpuTensor == nullptr) {
+    delete impl;
+  }
+  return aicpuTensor;
+}
+
+/*
+ * get tensor list value of attr.
+ */
+std::vector<Tensor> AttrValueImpl::GetListTensor() const {
+  std::vector<Tensor> ret;
+  auto array = attr_value_->mutable_array();
+  KERNEL_CHECK_NULLPTR(array, ret, "Protobuf mutable array is nullptr")
+  for (int32_t i = 0; i < array->tensor_size(); i++) {
+    auto tensor = array->mutable_tensor(i);
+    if (tensor == nullptr) {
+      KERNEL_LOG_ERROR("Protobuf mutable tensor is nullptr.");
+      return std::vector<Tensor>();
+    }
+
+    TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+    if (impl == nullptr) {
+      KERNEL_LOG_ERROR("Create TensorImpl[%d] failed.", i);
+      return std::vector<Tensor>();
+    } else {
+      auto aicpuTensor = CpuKernelUtils::CreateTensor(impl);
+      if (aicpuTensor == nullptr) {
+        delete impl;
+        return std::vector<Tensor>();
+      }
+      ret.emplace_back(*aicpuTensor);
+    }
+  }
+  return ret;
+}
+
+/*
+ * get tensor list size of attr.
+ */
+int32_t AttrValueImpl::ListTensorSize() const {
+  auto array = attr_value_->array();
+  return array.tensor_size();
+}
+
+/*
+ * get attr proto.
+ */
+aicpuops::AttrValue *AttrValueImpl::GetProto() const {
+  return attr_value_.get();
+}
+}  // namespace aicpu
diff --git a/cpu_context/cpu_proto/attr_value_impl.h b/cpu_context/cpu_proto/attr_value_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..b3e10978bebcb77102eee2f7447d69c71e561d9c
--- /dev/null
+++ b/cpu_context/cpu_proto/attr_value_impl.h
@@ -0,0 +1,309 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_CPU_PROTO_ATTR_VALUE_IMPL_H_
+#define AICPU_CONTEXT_CPU_PROTO_ATTR_VALUE_IMPL_H_
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "cpu_tensor.h"
+#include "cpu_tensor_shape.h"
+#include "proto/cpu_attr.pb.h"
+
+namespace aicpu {
+class AttrValueImpl {
+  friend class CpuKernelUtils;
+
+ public:
+  AttrValueImpl(
+      aicpuops::AttrValue *attr,
+      std::function<void(aicpuops::AttrValue *)> del_func =
+          [](aicpuops::AttrValue *p) {})
+      : attr_value_(attr, del_func) {}
+
+  ~AttrValueImpl() = default;
+  AttrValueImpl(const AttrValueImpl &) = delete;
+  AttrValueImpl(AttrValueImpl &&) = delete;
+  AttrValueImpl &operator=(const AttrValueImpl &) = delete;
+  AttrValueImpl &operator=(AttrValueImpl &&) = delete;
+
+  /*
+   * get string value of attr.
+   * @return string: string value of attr
+   */
+  std::string GetString() const;
+
+  /*
+   * get string list value of attr.
+   * @return vector<std::string>: string list value of attr
+   */
+  std::vector<std::string> GetListString() const;
+
+  /*
+   * attr add string value to list.
+   * @param string: string value need to add to list
+   */
+  void AddListString(const std::string &string);
+
+  /*
+   * get string list size of attr.
+   * @return int32_t: string list size of attr
+   */
+  int32_t ListStringSize() const;
+
+  /*
+   * set string value to attr.
+   * @param string: string value need to set to attr
+   */
+  void SetString(const std::string &string);
+
+  /*
+   * set string list value to attr.
+   * @param vector<std::string>: string list value need to set to attr
+   */
+  void SetListString(const std::vector<std::string> &bytes);
+
+  /*
+   * get int value of attr.
+   * @return int64_t: int value of attr
+   */
+  int64_t GetInt() const;
+
+  /*
+   * get int list value of attr.
+   * @return vector<int64_t>: int list value of attr
+   */
+  std::vector<int64_t> GetListInt() const;
+
+  /*
+   * attr add int value to list.
+   * @param i: int value need to add to list
+   */
+  void AddListInt(int64_t i);
+
+  /*
+   * get int list size of attr.
+   * @return int32_t: int list size of attr
+   */
+  int32_t ListIntSize() const;
+
+  /*
+   * set int value to attr.
+   * @param i: int value need to set to attr
+   */
+  void SetInt(int64_t i);
+
+  /*
+   * set int list value to attr.
+   * @param vector<int64_t>: int list value need to set to attr
+   */
+  void SetListInt(const std::vector<int64_t> &i);
+
+  /*
+   * get float value of attr.
+   * @return float: float value of attr
+   */
+  float GetFloat() const;
+
+  /*
+   * get float list value of attr.
+   * @return vector<float>: float list value of attr
+   */
+  std::vector<float> GetListFloat() const;
+
+  /*
+   * attr add float value to list.
+   * @param f: float value need to add to list
+   */
+  void AddListFloat(float f);
+
+  /*
+   * get float list size of attr.
+   * @return int32_t: float list size of attr
+   */
+  int32_t ListFloatSize() const;
+
+  /*
+   * set float value to attr.
+   * @param f: float value need to set to attr
+   */
+  void SetFloat(float f);
+
+  /*
+   * set float list value to attr.
+   * @param vector<float>: float list value need to set to attr
+   */
+  void SetListFloat(const std::vector<float> &f);
+
+  /*
+   * get bool value of attr.
+   * @return bool: bool value of attr
+   */
+  bool GetBool() const;
+
+  /*
+   * get bool list value of attr.
+   * @return vector<bool>: bool list value of attr
+   */
+  std::vector<bool> GetListBool() const;
+
+  /*
+   * attr add bool value to list.
+   * @param b: bool value need to add to list
+   */
+  void AddListBool(bool b);
+
+  /*
+   * get bool list size of attr.
+   * @return int32_t: bool list size of attr
+   */
+  int32_t ListBoolSize() const;
+
+  /*
+   * set bool value to attr.
+   * @param b: bool value need to set to attr
+   */
+  void SetBool(bool b);
+
+  /*
+   * set bool list value to attr.
+   * @param vector<bool>: bool list value need to set to attr
+   */
+  void SetListBool(const std::vector<bool> &b);
+
+  /*
+   * get data type value of attr.
+   * @return DataType: data type value of attr
+   */
+  DataType GetDataType() const;
+
+  /*
+   * get data type list value of attr.
+   * @return vector<int32_t>: data type list value of attr
+   */
+  std::vector<DataType> GetListDataType() const;
+
+  /*
+   * attr add data type value to list.
+   * @param type: data type value need to add to list
+   */
+  void AddListDataType(DataType type);
+
+  /*
+   * get data type list size of attr.
+   * @return int32_t: data type list size of attr
+   */
+  int32_t ListDataTypeSize() const;
+
+  /*
+   * set data type value to attr.
+   * @param type: data type value need to set to attr
+   */
+  void SetDataType(DataType type);
+
+  /*
+   * set data type list value to attr.
+   * @param vector<DataType>: data type list value need to set to attr
+   */
+  void SetListDataType(const std::vector<DataType> &type);
+
+  /*
+   * set tensor shape value to attr.
+   * @param shape: tensor shape value need to set to attr
+   * @return bool: true->success false->failed
+   */
+  bool SetTensorShape(const TensorShape *shape);
+
+  /*
+   * set tensor shape list value to attr.
+   * @param vector<TensorShape>: tensor shape list value need to set to attr
+   * @return uint32_t: success number
+   */
+  uint32_t SetListTensorShape(const std::vector<TensorShape *> &shape);
+
+  /*
+   * attr add tensor shape value to list.
+   * @return shared_ptr<TensorShape>: tensor shape value ptr added to list
+   */
+  std::shared_ptr<TensorShape> AddListTensorShape();
+
+  /*
+   * get tensor shape value of attr.
+   * @return TensorShape: tensor shape value of attr
+   */
+  std::shared_ptr<TensorShape> GetTensorShape() const;
+
+  /*
+   * get tensor shape list value of attr.
+   * @return vector<TensorShape>: tensor shape list value of attr
+   */
+  std::vector<TensorShape> GetListTensorShape() const;
+
+  /*
+   * get tensor shape list size of attr.
+   * @return int32_t: tensor shape list size of attr
+   */
+  int32_t ListTensorShapeSize() const;
+
+  /*
+   * set tensor value to attr.
+   * @param tensor: tensor value need to set to attr
+   * @return bool: true->success false->failed
+   */
+  bool SetTensor(const Tensor *tensor);
+
+  /*
+   * set tensor list value to attr.
+   * @param vector<Tensor>: tensor list value need to set to attr
+   * @return uint32_t: success number
+   */
+  uint32_t SetListTensor(const std::vector<Tensor *> &tensor);
+
+  /*
+   * attr add tensor value to list.
+   * @return shared_ptr<Tensor>: tensor value ptr added to list
+   */
+  std::shared_ptr<Tensor> AddListTensor();
+
+  /*
+   * get tensor value of attr.
+   * @return Tensor: tensor value of attr
+   */
+  std::shared_ptr<Tensor> GetTensor() const;
+
+  /*
+   * get tensor list value of attr.
+   * @return vector<Tensor>: tensor list value of attr
+   */
+  std::vector<Tensor> GetListTensor() const;
+
+  /*
+   * get tensor list size of attr.
+   * @return int32_t: tensor list size of attr
+   */
+  int32_t ListTensorSize() const;
+
+  /*
+   * get attr proto.
+   */
+  aicpuops::AttrValue *GetProto() const;
+
+ private:
+  std::shared_ptr<aicpuops::AttrValue> attr_value_{nullptr};
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_CPU_PROTO_ATTR_VALUE_IMPL_H_
diff --git a/cpu_context/cpu_proto/node_def.cc b/cpu_context/cpu_proto/node_def.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4887982525fac72ccbfba174a3a3fb472fd1f615
--- /dev/null
+++ b/cpu_context/cpu_proto/node_def.cc
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_node_def.h"
+#include "node_def_impl.h"
+
+namespace aicpu {
+NodeDef::NodeDef(NodeDefImpl *impl) : impl_(impl) {}
+
+/*
+ * parse parameter from string.
+ */
+bool NodeDef::ParseFromString(const std::string &str) {
+  return impl_->ParseFromString(str);
+}
+
+/*
+ * serialize string to node def.
+ */
+bool NodeDef::SerializeToString(std::string &str) const {
+  return impl_->SerializeToString(str);
+}
+
+/*
+ * set op type to node def.
+ */
+void NodeDef::SetOpType(const std::string &op) { impl_->SetOpType(op); }
+
+/*
+ * get op type of node def.
+ */
+std::string NodeDef::GetOpType() const { return impl_->GetOpType(); }
+
+/*
+ * add input tensor to node def.
+ */
+std::shared_ptr<Tensor> NodeDef::AddInputs() { return impl_->AddInputs(); }
+
+/*
+ * add output tensor to node def.
+ */
+std::shared_ptr<Tensor> NodeDef::AddOutputs() { return impl_->AddOutputs(); }
+
+/*
+ * add attr to node def.
+ */
+bool NodeDef::AddAttrs(const std::string &name, const AttrValue *attr) {
+  return impl_->AddAttrs(name, attr);
+}
+
+/*
+ * get input tensor size of node def.
+ */
+int32_t NodeDef::InputsSize() const { return impl_->InputsSize(); }
+
+/*
+ * get output tensor size of node def.
+ */
+int32_t NodeDef::OutputsSize() const { return impl_->OutputsSize(); }
+
+/*
+ * get input tensor of node def.
+ */
+std::shared_ptr<Tensor> NodeDef::MutableInputs(int32_t index) const {
+  return impl_->MutableInputs(index);
+}
+
+/*
+ * get output tensor of node def.
+ */
+std::shared_ptr<Tensor> NodeDef::MutableOutputs(int32_t index) const {
+  return impl_->MutableOutputs(index);
+}
+
+/*
+ * get attr of node def.
+ */
+std::unordered_map<std::string, std::shared_ptr<AttrValue> > NodeDef::Attrs()
+    const {
+  return impl_->Attrs();
+}
+}  // namespace aicpu
diff --git a/cpu_context/cpu_proto/node_def_impl.cc b/cpu_context/cpu_proto/node_def_impl.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b42df9e789b8605ea8fdb2a33b96af879715f1f5
--- /dev/null
+++ b/cpu_context/cpu_proto/node_def_impl.cc
@@ -0,0 +1,227 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "node_def_impl.h"
+
+#include "attr_value_impl.h"
+#include "cpu_kernel_utils.h"
+#include "log.h"
+#include "status.h"
+#include "tensor_impl.h"
+
+namespace aicpu {
+/*
+ * parse parameter from string.
+ */
+bool NodeDefImpl::ParseFromString(const std::string &str) {
+  if (!nodedef_->ParseFromString(str)) {
+    KERNEL_LOG_ERROR("ParseFromString failed");
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ * serialize string to node def.
+ */
+bool NodeDefImpl::SerializeToString(std::string &str) const {
+  if (!nodedef_->SerializeToString(&str)) {
+    KERNEL_LOG_ERROR("SerializeToString failed");
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ * set op type to node def.
+ */
+void NodeDefImpl::SetOpType(const std::string &op) { nodedef_->set_op(op); }
+
+/*
+ * get op type of node def.
+ */
+std::string NodeDefImpl::GetOpType() const { return nodedef_->op(); }
+
+/*
+ * add input tensor to node def.
+ */
+std::shared_ptr<Tensor> NodeDefImpl::AddInputs() {
+  auto tensor = nodedef_->add_inputs();
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf node def add tensor is nullptr.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpu_tensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpu_tensor == nullptr) {
+    delete impl;
+  }
+  return aicpu_tensor;
+}
+
+/*
+ * add output tensor to node def.
+ */
+std::shared_ptr<Tensor> NodeDefImpl::AddOutputs() {
+  auto tensor = nodedef_->add_outputs();
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf node def add tensor is nullptr.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpu_tensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpu_tensor == nullptr) {
+    delete impl;
+  }
+  return aicpu_tensor;
+}
+
+/*
+ * add attr to node def.
+ */
+bool NodeDefImpl::AddAttrs(const std::string &name, const AttrValue *attr) {
+  if (attr == nullptr) {
+    KERNEL_LOG_ERROR("Attr is null.");
+    return false;
+  }
+
+  auto attrs = nodedef_->mutable_attrs();
+  KERNEL_CHECK_NULLPTR(attrs, false, "Protobuf mutable attrs is null")
+  auto impl = CpuKernelUtils::GetImpl(attr);
+  auto pair = attrs->insert(
+      google::protobuf::Map<std::string, aicpuops::AttrValue>::value_type(
+          name, *(impl->GetProto())));
+  if (!pair.second) {
+    KERNEL_LOG_ERROR("Nodedef insert attr %s to nodeDef failed.", name.c_str());
+    return false;
+  }
+  return true;
+}
+
+/*
+ * get input tensor size of node def.
+ */
+int32_t NodeDefImpl::InputsSize() const { return nodedef_->inputs_size(); }
+
+/*
+ * get output tensor size of node def.
+ */
+int32_t NodeDefImpl::OutputsSize() const { return nodedef_->outputs_size(); }
+
+/*
+ * get input tensor of node def.
+ */
+std::shared_ptr<Tensor> NodeDefImpl::MutableInputs(int32_t index) const {
+  if ((index >= InputsSize()) || (index < 0)) {
+    KERNEL_LOG_ERROR(
+        "Index[%d] should be less than input tensors size[%d] and noe less than "
+        "0.",
+        index, InputsSize());
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto tensor = nodedef_->mutable_inputs(index);
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf node def mutable inputs[%d] tensor is nullptr.",
+                     index);
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpu_tensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpu_tensor == nullptr) {
+    delete impl;
+  }
+  return aicpu_tensor;
+}
+
+/*
+ * get output tensor of node def.
+ */
+std::shared_ptr<Tensor> NodeDefImpl::MutableOutputs(int32_t index) const {
+  if ((index >= OutputsSize()) || (index < 0)) {
+    KERNEL_LOG_ERROR(
+        "Index[%d] should be less than output tensors size[%d] and noe less than "
+        "0.",
+        index, OutputsSize());
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto tensor = nodedef_->mutable_outputs(index);
+  if (tensor == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf node def mutable outputs[%d] tensor is nullptr.",
+                     index);
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  TensorImpl *impl = new (std::nothrow) TensorImpl(tensor);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorImpl failed.");
+    return std::shared_ptr<Tensor>(nullptr);
+  }
+
+  auto aicpu_tensor = CpuKernelUtils::CreateTensor(impl);
+  if (aicpu_tensor == nullptr) {
+    delete impl;
+  }
+  return aicpu_tensor;
+}
+
+/*
+ * get attr of node def.
+ */
+std::unordered_map<std::string, std::shared_ptr<AttrValue>> NodeDefImpl::Attrs()
+    const {
+  std::unordered_map<std::string, std::shared_ptr<AttrValue>> ret;
+  auto attrs_map = nodedef_->mutable_attrs();
+  KERNEL_CHECK_NULLPTR(attrs_map, ret, "Protobuf mutable attrs is null")
+
+  for (auto it = attrs_map->begin(); it != attrs_map->end(); ++it) {
+    aicpuops::AttrValue *attr = &(it->second);
+    AttrValueImpl *impl = new (std::nothrow) AttrValueImpl(attr);
+    if (impl == nullptr) {
+      KERNEL_LOG_WARN("Create AttrValueImpl failed.");
+    }
+
+    auto attr_value = CpuKernelUtils::CreateAttrValue(impl);
+    if (attr_value == nullptr) {
+      KERNEL_LOG_WARN("Create CreateAttrValue failed.");
+      delete impl;
+    }
+    (void)ret.insert(std::make_pair(it->first, attr_value));
+  }
+
+  return ret;
+}
+}  // namespace aicpu
diff --git a/cpu_context/cpu_proto/node_def_impl.h b/cpu_context/cpu_proto/node_def_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..959e3e67f328bc922ac9359da69b1a0eefb3a891
--- /dev/null
+++ b/cpu_context/cpu_proto/node_def_impl.h
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_CPU_PROTO_NODE_DEF_IMPL_H_
+#define AICPU_CONTEXT_CPU_PROTO_NODE_DEF_IMPL_H_
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "cpu_attr_value.h"
+#include "cpu_tensor.h"
+#include "proto/cpu_node_def.pb.h"
+
+namespace aicpu {
+class NodeDefImpl {
+  friend class CpuKernelUtils;
+
+ public:
+  NodeDefImpl(
+      aicpuops::NodeDef *nodedef,
+      std::function<void(aicpuops::NodeDef *)> del_func =
+          [](aicpuops::NodeDef *p) {})
+      : nodedef_(nodedef, del_func) {}
+
+  ~NodeDefImpl() = default;
+  NodeDefImpl(const NodeDefImpl &) = delete;
+  NodeDefImpl(NodeDefImpl &&) = delete;
+  NodeDefImpl &operator=(const NodeDefImpl &) = delete;
+  NodeDefImpl &operator=(NodeDefImpl &&) = delete;
+
+  /*
+   * parse parameter from string.
+   * @return bool: true->success, false->failed
+   */
+  bool ParseFromString(const std::string &str);
+
+  /*
+   * serialize string to node def.
+   * @return bool: true->success, false->failed
+   */
+  bool SerializeToString(std::string &str) const;
+
+  /*
+   * set op type to node def.
+   * @param op: op type
+   */
+  void SetOpType(const std::string &op);
+
+  /*
+   * get op type of node def.
+   * @return string: op type
+   */
+  std::string GetOpType() const;
+
+  /*
+   * add input tensor to node def.
+   * @return shared_ptr<Tensor>: not null->success, null->failed
+   */
+  std::shared_ptr<Tensor> AddInputs();
+
+  /*
+   * add output tensor to node def.
+   * @return shared_ptr<Tensor>: not null->success, null->failed
+   */
+  std::shared_ptr<Tensor> AddOutputs();
+
+  /*
+   * add attr to node def.
+   * @param name: attr name
+   * @param attr: attr need to add
+   * @return bool: true->success, false->failed
+   */
+  bool AddAttrs(const std::string &name, const AttrValue *attr);
+
+  /*
+   * get input tensor size of node def.
+   * @return int32_t: input tensor size of node def
+   */
+  int32_t InputsSize() const;
+
+  /*
+   * get output tensor size of node def.
+   * @return int32_t: input tensor size of node def
+   */
+  int32_t OutputsSize() const;
+
+  /*
+   * get input tensor of node def.
+   * @param index: index of input tensor
+   * @return shared_ptr<Tensor>: input tensor ptr of node def
+   */
+  std::shared_ptr<Tensor> MutableInputs(int32_t index) const;
+
+  /*
+   * get output tensor of node def.
+   * @param index: index of output tensor
+   * @return shared_ptr<Tensor>: output tensor ptr of node def
+   */
+  std::shared_ptr<Tensor> MutableOutputs(int32_t index) const;
+
+  /*
+   * get attr of node def.
+   * @return std::unordered_map<std::string, std::shared_ptr<AttrValue>>: attrs
+   * of node def
+   */
+  std::unordered_map<std::string, std::shared_ptr<AttrValue> > Attrs() const;
+
+ private:
+  std::shared_ptr<aicpuops::NodeDef> nodedef_{nullptr};
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_CPU_PROTO_NODE_DEF_IMPL_H_
diff --git a/cpu_context/cpu_proto/proto/cpu_attr.proto b/cpu_context/cpu_proto/proto/cpu_attr.proto
new file mode 100644
index 0000000000000000000000000000000000000000..cbad96511f7410b8b164a6b7fff2af71a5ce1e0a
--- /dev/null
+++ b/cpu_context/cpu_proto/proto/cpu_attr.proto
@@ -0,0 +1,28 @@
+syntax = "proto3";
+package aicpuops;
+import "cpu_tensor.proto";
+import "cpu_tensor_shape.proto";
+
+message AttrValue {
+
+  message ArrayValue {
+    repeated bytes s = 2;                         //"array(string)"
+    repeated int64 i = 3 [ packed = true ];       //"array(int)"
+    repeated float f = 4 [ packed = true ];       //"array(float)"
+    repeated bool b = 5 [ packed = true ];        //"array(bool)"
+    repeated int32 type = 6 [ packed = true ];    //"array(type)"
+    repeated TensorShape shape = 7;               //"array(shape)"
+    repeated Tensor tensor = 8;                   //"array(tensor)"
+  }
+
+  oneof value {
+    ArrayValue array = 1;
+    bytes s = 2;           //"string"
+    int64 i = 3;           //"int"
+    float f = 4;           //"float"
+    bool b = 5;            //"bool"
+    int32 type = 6;        //"type"
+    TensorShape shape = 7; //"shape"
+    Tensor tensor = 8;     //"tensor"
+  }
+}
diff --git a/cpu_context/cpu_proto/proto/cpu_node_def.proto b/cpu_context/cpu_proto/proto/cpu_node_def.proto
new file mode 100644
index 0000000000000000000000000000000000000000..e11e25c68925afa2e37d7e4aaa39432e4890492c
--- /dev/null
+++ b/cpu_context/cpu_proto/proto/cpu_node_def.proto
@@ -0,0 +1,18 @@
+syntax = "proto3";
+package aicpuops;
+import "cpu_attr.proto";
+import "cpu_tensor.proto";
+
+message DynamicIdx {
+  int32 idx = 1;
+  int32 num = 2;
+}
+
+message NodeDef {
+  string op = 2;
+  map<string, AttrValue> attrs = 3;
+  repeated Tensor inputs = 4;
+  repeated Tensor outputs = 5;
+  map<string, DynamicIdx> dym_inputs = 6;
+  map<string, DynamicIdx> dym_outputs = 7;
+}
diff --git a/cpu_context/cpu_proto/proto/cpu_tensor.proto b/cpu_context/cpu_proto/proto/cpu_tensor.proto
new file mode 100644
index 0000000000000000000000000000000000000000..248245010f17f398acb064b52240f09616ce7e2d
--- /dev/null
+++ b/cpu_context/cpu_proto/proto/cpu_tensor.proto
@@ -0,0 +1,21 @@
+syntax = "proto3";
+
+option cc_enable_arenas = true;
+import "cpu_tensor_shape.proto";
+package aicpuops;
+
+message Tensor {
+
+  // tensor shape info
+  TensorShape tensor_shape = 1;
+
+  // tensor content data type
+  int32 tensor_type = 2;
+
+  // tensor memory device
+  // data located memory device , "DDR" "HBM" OR "NONE"
+  string mem_device = 3;
+  string name = 4;
+  uint64 data_ptr = 5;
+  uint64 data_size = 6;
+}
diff --git a/cpu_context/cpu_proto/proto/cpu_tensor_shape.proto b/cpu_context/cpu_proto/proto/cpu_tensor_shape.proto
new file mode 100644
index 0000000000000000000000000000000000000000..8e0a7dd249ac831af43abcb7c45ed99827efc415
--- /dev/null
+++ b/cpu_context/cpu_proto/proto/cpu_tensor_shape.proto
@@ -0,0 +1,20 @@
+syntax = "proto3";
+package aicpuops;
+
+message TensorShape {
+  // One dimension of the tensor.
+  message Dim {
+    // size must >=0
+    int64 size = 1;
+  };
+
+  // group dim info
+  repeated Dim dim = 2;
+
+  // If true, the number of dimensions in the shape is unknown.
+  // If true, "dim.size()" must be 0.
+  bool unknown_rank = 3;
+
+  // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
+  int32 data_format = 4;
+};
diff --git a/cpu_context/cpu_proto/tensor.cc b/cpu_context/cpu_proto/tensor.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5885ddaf10d295100e50641194441bda75570b0c
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor.cc
@@ -0,0 +1,77 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_tensor.h"
+#include "tensor_impl.h"
+
+namespace aicpu {
+Tensor::Tensor(TensorImpl *impl) : impl_(impl) {}
+
+/*
+ * get tensor shape value of tensor.
+ */
+std::shared_ptr<TensorShape> Tensor::GetTensorShape() const {
+  return impl_->GetTensorShape();
+}
+
+/*
+ * set tensor shape value to tensor.
+ */
+bool Tensor::SetTensorShape(const TensorShape *shape) {
+  return impl_->SetTensorShape(shape);
+}
+
+/*
+ * get data type value of tensor.
+ */
+DataType Tensor::GetDataType() const { return impl_->GetDataType(); }
+
+/*
+ * set data type value to tensor.
+ */
+void Tensor::SetDataType(DataType type) { impl_->SetDataType(type); }
+
+/*
+ * get data ptr of tensor.
+ */
+void *Tensor::GetData() const { return impl_->GetData(); }
+
+/*
+ * set data ptr to tensor.
+ */
+void Tensor::SetData(void *addr) { impl_->SetData(addr); }
+
+/*
+ * get data size of tensor.
+ */
+uint64_t Tensor::GetDataSize() const { return impl_->GetDataSize(); }
+
+/*
+ * set data size to tensor.
+ */
+void Tensor::SetDataSize(uint64_t size) { impl_->SetDataSize(size); }
+
+/*
+ * calculate data size by tensor shape.
+ */
+int64_t Tensor::CalcDataSizeByShape() const {
+  return impl_->CalcDataSizeByShape();
+}
+
+/*
+ * get data elements number.
+ */
+int64_t Tensor::NumElements() const { return impl_->NumElements(); }
+}  // namespace aicpu
\ No newline at end of file
diff --git a/cpu_context/cpu_proto/tensor_impl.cc b/cpu_context/cpu_proto/tensor_impl.cc
new file mode 100644
index 0000000000000000000000000000000000000000..127decb02e4ba6dac07c133615a6f5025d6a679f
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor_impl.cc
@@ -0,0 +1,146 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tensor_impl.h"
+
+#include "cpu_kernel_utils.h"
+#include "cpu_types.h"
+#include "log.h"
+#include "proto/cpu_tensor_shape.pb.h"
+#include "tensor_shape_impl.h"
+
+namespace aicpu {
+/*
+ * get tensor shape value of tensor.
+ */
+std::shared_ptr<TensorShape> TensorImpl::GetTensorShape() const {
+  aicpuops::TensorShape *tensor_shape = tensor_->mutable_tensor_shape();
+  if (tensor_shape == nullptr) {
+    KERNEL_LOG_ERROR("Protobuf mutable tensor shape is null.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  TensorShapeImpl *impl = new (std::nothrow) TensorShapeImpl(tensor_shape);
+  if (impl == nullptr) {
+    KERNEL_LOG_ERROR("Create TensorShapeImpl failed.");
+    return std::shared_ptr<TensorShape>(nullptr);
+  }
+
+  auto aicpu_shape = CpuKernelUtils::CreateTensorShape(impl);
+  if (aicpu_shape == nullptr) {
+    delete impl;
+  }
+  return aicpu_shape;
+}
+
+/*
+ * set tensor shape value to tensor.
+ */
+bool TensorImpl::SetTensorShape(const TensorShape *shape) {
+  KERNEL_CHECK_NULLPTR(shape, false, "Tensor shape is null")
+
+  aicpuops::TensorShape *tensor_shape = tensor_->mutable_tensor_shape();
+  KERNEL_CHECK_NULLPTR(tensor_shape, false,
+                       "Protobuf mutable tensor shape is null")
+  auto impl = CpuKernelUtils::GetImpl(shape);
+  KERNEL_CHECK_NULLPTR(impl, false, "Get impl is null")
+
+  auto proto = impl->GetProto();
+  KERNEL_CHECK_NULLPTR(proto, false, "Get proto is null")
+
+  *tensor_shape = *(proto);
+  return true;
+}
+
+/*
+ * get data type value of tensor.
+ */
+DataType TensorImpl::GetDataType() const {
+  return static_cast<DataType>(tensor_->tensor_type());
+}
+
+/*
+ * set data type value to tensor.
+ */
+void TensorImpl::SetDataType(DataType type) { tensor_->set_tensor_type(type); }
+
+/*
+ * get data ptr of tensor.
+ */
+void *TensorImpl::GetData() const {
+  return reinterpret_cast<void *>(static_cast<uintptr_t>(tensor_->data_ptr()));
+}
+
+/*
+ * set data ptr to tensor.
+ */
+void TensorImpl::SetData(void *addr) {
+  tensor_->set_data_ptr(
+      static_cast<uint64_t>(reinterpret_cast<intptr_t>(addr)));
+}
+
+/*
+ * get data size of tensor.
+ */
+uint64_t TensorImpl::GetDataSize() const { return tensor_->data_size(); }
+
+/*
+ * set data size to tensor.
+ */
+void TensorImpl::SetDataSize(uint64_t size) { tensor_->set_data_size(size); }
+
+/*
+ * get name of tensor.
+ */
+std::string TensorImpl::GetName() const { return tensor_->name(); }
+
+/*
+ * set name of tensor.
+ */
+void TensorImpl::SetName(const std::string &name) { tensor_->set_name(name); }
+
+/*
+ * calculate data size by tensor shape.
+ */
+int64_t TensorImpl::CalcDataSizeByShape() const {
+  int64_t data_size = NumElements();
+  int32_t element_size =
+      GetSizeByDataType(static_cast<DataType>(GetDataType()));
+  if ((data_size < 0) || (element_size < 0)) {
+    KERNEL_LOG_WARN(
+        "Get tensor element number[%lld] or element type size[%d] less than 0.",
+        data_size, element_size);
+    return -1;
+  }
+
+  KERNEL_CHECK_ASSIGN_64S_MULTI(data_size, element_size, data_size, -1);
+  return data_size;
+}
+
+/*
+ * get data elements number.
+ */
+int64_t TensorImpl::NumElements() const {
+  auto shape = GetTensorShape();
+  if (shape == nullptr) {
+    KERNEL_LOG_ERROR("Get tensor shape failed.");
+    return -1;
+  }
+
+  return shape->NumElements();
+}
+
+aicpuops::Tensor *TensorImpl::GetProto() const { return tensor_.get(); }
+}  // namespace aicpu
\ No newline at end of file
diff --git a/cpu_context/cpu_proto/tensor_impl.h b/cpu_context/cpu_proto/tensor_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..134cf47e0c373bc4c704de646083be1389bfd62c
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor_impl.h
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_CPU_PROTO_TENSOR_IMPL_H_
+#define AICPU_CONTEXT_CPU_PROTO_TENSOR_IMPL_H_
+#include <functional>
+#include <memory>
+
+#include "cpu_tensor_shape.h"
+#include "proto/cpu_tensor.pb.h"
+
+namespace aicpu {
+class TensorImpl {
+  friend class CpuKernelUtils;
+
+ public:
+  TensorImpl(
+      aicpuops::Tensor *tensor,
+      std::function<void(aicpuops::Tensor *)> delFunc =
+          [](aicpuops::Tensor *p) {})
+      : tensor_(tensor, delFunc) {}
+
+  ~TensorImpl() = default;
+  TensorImpl(const TensorImpl &) = delete;
+  TensorImpl(TensorImpl &&) = delete;
+  TensorImpl &operator=(const TensorImpl &) = delete;
+  TensorImpl &operator=(TensorImpl &&) = delete;
+
+  /*
+   * set tensor shape value to tensor.
+   * @param shape: tensor shape value need to set to tensor
+   * @return bool: true->success, false->failed
+   */
+  bool SetTensorShape(const TensorShape *shape);
+
+  /*
+   * get tensor shape value of tensor.
+   * @return std::shared_ptr<TensorShape>: tensor shape value of tensor
+   */
+  std::shared_ptr<TensorShape> GetTensorShape() const;
+
+  /*
+   * set data type value to tensor.
+   * @param type: data type value need to set to tensor
+   */
+  void SetDataType(DataType type);
+
+  /*
+   * get data type value of tensor.
+   * @return DataType: data type value of tensor
+   */
+  DataType GetDataType() const;
+
+  /*
+   * set data ptr to tensor.
+   * @param addr: tensor data ptr
+   */
+  void SetData(void *addr);
+
+  /*
+   * get data ptr of tensor.
+   * @return void *: tensor data ptr
+   */
+  void *GetData() const;
+
+  /*
+   * set data size to tensor.
+   * @param size: tensor data size
+   */
+  void SetDataSize(uint64_t size);
+
+  /*
+   * get data size of tensor.
+   * @return uint64_t: tensor data size
+   */
+  uint64_t GetDataSize() const;
+
+  /*
+   * get name of tensor.
+   * @return std::string: tensor name
+   */
+  std::string GetName() const;
+
+  /*
+   * set name of tensor.
+   * @param name: tensor name
+   */
+  void SetName(const std::string &name);
+
+  /*
+   * calculate data size by tensor shape.
+   * @return success->not less than 0, failed->less than 0
+   */
+  int64_t CalcDataSizeByShape() const;
+
+  /*
+   * get data elements number.
+   * @return success->not less than 0, unknown->less than 0
+   */
+  int64_t NumElements() const;
+
+  /*
+   * get tensor proto.
+   */
+  aicpuops::Tensor *GetProto() const;
+
+ private:
+  std::shared_ptr<aicpuops::Tensor> tensor_{nullptr};
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_CPU_PROTO_TENSOR_IMPL_H_
diff --git a/cpu_context/cpu_proto/tensor_shape.cc b/cpu_context/cpu_proto/tensor_shape.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f36402546d19a14b1ffc877c003b072a186b915b
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor_shape.cc
@@ -0,0 +1,74 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cpu_tensor_shape.h"
+#include "tensor_shape_impl.h"
+
+namespace aicpu {
+TensorShape::TensorShape(TensorShapeImpl *impl) : impl_(impl) {}
+
+/*
+ * get dims value of tensor shape.
+ */
+std::vector<int64_t> TensorShape::GetDimSizes() const {
+  return impl_->GetDimSizes();
+}
+
+/*
+ * set dims value to tensor shape.
+ */
+void TensorShape::SetDimSizes(const std::vector<int64_t> &dims) {
+  impl_->SetDimSizes(dims);
+}
+
+/*
+ * get format value of tensor shape.
+ */
+Format TensorShape::GetFormat() const { return impl_->GetFormat(); }
+
+/*
+ * set format value to tensor shape.
+ */
+void TensorShape::SetFormat(Format format) { impl_->SetFormat(format); }
+
+/*
+ * get unknown rank value of tensor shape.
+ */
+bool TensorShape::GetUnknownRank() const { return impl_->GetUnknownRank(); }
+
+/*
+ * set unknown rank value to tensor shape.
+ */
+void TensorShape::SetUnknownRank(bool unknownRank) {
+  impl_->SetUnknownRank(unknownRank);
+}
+
+/*
+ * get dims size of tensor shape.
+ */
+int32_t TensorShape::GetDims() const { return impl_->GetDims(); }
+
+/*
+ * get dim value of tensor shape index dim.
+ */
+int64_t TensorShape::GetDimSize(int32_t index) const {
+  return impl_->GetDimSize(index);
+}
+
+/*
+ * get data elements number.
+ */
+int64_t TensorShape::NumElements() const { return impl_->NumElements(); }
+}  // namespace aicpu
\ No newline at end of file
diff --git a/cpu_context/cpu_proto/tensor_shape_impl.cc b/cpu_context/cpu_proto/tensor_shape_impl.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f7246e2fa004563e57aeed7da1c3c125b503e124
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor_shape_impl.cc
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tensor_shape_impl.h"
+
+#include "log.h"
+
+namespace aicpu {
+/*
+ * get dims value of tensor shape.
+ */
+std::vector<int64_t> TensorShapeImpl::GetDimSizes() const {
+  std::vector<int64_t> ret;
+  for (int32_t i = 0; i < tensor_shape_->dim_size(); i++) {
+    ret.emplace_back(tensor_shape_->dim(i).size());
+  }
+  return ret;
+}
+
+/*
+ * set dims value to tensor shape.
+ */
+void TensorShapeImpl::SetDimSizes(const std::vector<int64_t> &dims) {
+  tensor_shape_->clear_dim();
+  for (const auto &dim : dims) {
+    aicpuops::TensorShape_Dim *aicpu_dims = tensor_shape_->add_dim();
+    KERNEL_CHECK_NULLPTR_VOID(aicpu_dims, "Protobuf add dim is null")
+    aicpu_dims->set_size(dim);
+  }
+}
+
+/*
+ * get format value of tensor shape.
+ */
+Format TensorShapeImpl::GetFormat() const {
+  return static_cast<Format>(tensor_shape_->data_format());
+}
+
+/*
+ * set format value to tensor shape.
+ */
+void TensorShapeImpl::SetFormat(Format format) {
+  tensor_shape_->set_data_format(format);
+}
+
+/*
+ * get unknown rank value of tensor shape.
+ */
+bool TensorShapeImpl::GetUnknownRank() const {
+  return tensor_shape_->unknown_rank();
+}
+
+/*
+ * set unknown rank value to tensor shape.
+ */
+void TensorShapeImpl::SetUnknownRank(bool unknown_rank) {
+  tensor_shape_->set_unknown_rank(unknown_rank);
+}
+
+/*
+ * get dims size of tensor shape.
+ */
+int32_t TensorShapeImpl::GetDims() const { return tensor_shape_->dim_size(); }
+
+/*
+ * get dim value of tensor shape index dim.
+ */
+int64_t TensorShapeImpl::GetDimSize(int32_t index) const {
+  if ((index >= GetDims()) || (index < 0)) {
+    KERNEL_LOG_ERROR(
+        "Dim index[%d] must be not less than 0 and not greater than dims "
+        "size[%d]",
+        index, GetDims());
+    return 0;
+  }
+
+  return tensor_shape_->dim(index).size();
+}
+
+/*
+ * get data elements number.
+ */
+int64_t TensorShapeImpl::NumElements() const {
+  int64_t num_elements = 1;
+  for (int32_t i = 0; i < tensor_shape_->dim_size(); i++) {
+    int64_t dim_size = tensor_shape_->dim(i).size();
+    if (dim_size < 0) {
+      return -1;
+    }
+
+    KERNEL_CHECK_ASSIGN_64S_MULTI(num_elements, dim_size, num_elements, -1);
+  }
+  return num_elements;
+}
+
+/*
+ * get tensor proto.
+ * @return shared_ptr<TensorShapeProto>:tensor shape proto ptr
+ */
+
+aicpuops::TensorShape *TensorShapeImpl::GetProto() const {
+  return tensor_shape_.get();
+}
+}  // namespace aicpu
\ No newline at end of file
diff --git a/cpu_context/cpu_proto/tensor_shape_impl.h b/cpu_context/cpu_proto/tensor_shape_impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..ea16b2fda7001fd784558978eae0a61696b3e456
--- /dev/null
+++ b/cpu_context/cpu_proto/tensor_shape_impl.h
@@ -0,0 +1,106 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_CPU_PROTO_TENSOR_SHAPE_IMPL_H_
+#define AICPU_CONTEXT_CPU_PROTO_TENSOR_SHAPE_IMPL_H_
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "cpu_types.h"
+#include "proto/cpu_tensor_shape.pb.h"
+
+namespace aicpu {
+class TensorShapeImpl {
+  friend class CpuKernelUtils;
+
+ public:
+  TensorShapeImpl(
+      aicpuops::TensorShape *shape,
+      std::function<void(aicpuops::TensorShape *)> del_func =
+          [](aicpuops::TensorShape *p) {})
+      : tensor_shape_(shape, del_func) {}
+
+  ~TensorShapeImpl() = default;
+  TensorShapeImpl(const TensorShapeImpl &) = delete;
+  TensorShapeImpl(TensorShapeImpl &&) = delete;
+  TensorShapeImpl &operator=(const TensorShapeImpl &) = delete;
+  TensorShapeImpl &operator=(TensorShapeImpl &&) = delete;
+
+  /*
+   * set format value to tensor shape.
+   * @param format: format value need to set to tensor shape
+   */
+  void SetFormat(Format format);
+
+  /*
+   * get format value of tensor shape.
+   * @return Format: format value of tensor shape
+   */
+  Format GetFormat() const;
+
+  /*
+   * get unknown rank value of tensor shape.
+   * @return bool: unknown rank value of tensor shape
+   */
+  bool GetUnknownRank() const;
+
+  /*
+   * set unknown rank value to tensor shape.
+   * @param unknown_rank: unknown rank value need to set to tensor shape
+   */
+  void SetUnknownRank(bool unknown_rank);
+
+  /*
+   * set dims value to tensor shape.
+   * @param dims: dims value need to set to tensor shape
+   */
+  void SetDimSizes(const std::vector<int64_t> &dims);
+
+  /*
+   * get dims value of tensor shape.
+   * @return int32_t: dims value of tensor shape
+   */
+  std::vector<int64_t> GetDimSizes() const;
+
+  /*
+   * get dim value of tensor shape index dim.
+   * @param index: index dim of tensor shape
+   * @return int64_t: dim value of tensor shape index dim
+   */
+  int64_t GetDimSize(int32_t index) const;
+
+  /*
+   * get dims size of tensor shape.
+   * @return int32_t: dims size of tensor shape
+   */
+  int32_t GetDims() const;
+
+  /*
+   * get data elements number.
+   * @return success->not less than 0, unknown->less than 0
+   */
+  int64_t NumElements() const;
+
+  /*
+   * get tensor shape proto.
+   */
+  aicpuops::TensorShape *GetProto() const;
+
+ private:
+  std::shared_ptr<aicpuops::TensorShape> tensor_shape_{nullptr};
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_CPU_PROTO_TENSOR_SHAPE_IMPL_H_
diff --git a/cpu_context/inc/cpu_attr_value.h b/cpu_context/inc/cpu_attr_value.h
new file mode 100644
index 0000000000000000000000000000000000000000..a4d68e9d6bb8de72261ea9ac5b79ec6ee387761a
--- /dev/null
+++ b/cpu_context/inc/cpu_attr_value.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of attr
+ */
+
+#ifndef CPU_KERNEL_ATTR_VALUE_H
+#define CPU_KERNEL_ATTR_VALUE_H
+#include <string>
+#include <vector>
+
+#include "cpu_tensor.h"
+#include "cpu_tensor_shape.h"
+
+namespace aicpu {
+class AttrValueImpl;
+class AICPU_VISIBILITY AttrValue {
+    friend class CpuKernelUtils;
+
+public:
+    AttrValue() = delete;
+    ~AttrValue() = default;
+
+    AttrValue(const AttrValue &) = delete;
+    AttrValue(AttrValue &&) = delete;
+    AttrValue &operator=(const AttrValue &) = delete;
+    AttrValue &operator=(AttrValue &&) = delete;
+
+    /*
+     * get string value of attr.
+     * @return string: string value of attr
+     */
+    std::string GetString() const;
+
+    /*
+     * get string list value of attr.
+     * @return vector<std::string>: string list value of attr
+     */
+    std::vector<std::string> GetListString() const;
+
+    /*
+     * attr add string value to list.
+     * @param string: string value need to add to list
+     */
+    void AddListString(const std::string &string);
+
+    /*
+     * get string list size of attr.
+     * @return int32_t: string list size of attr
+     */
+    int32_t ListStringSize() const;
+
+    /*
+     * set string value to attr.
+     * @param string: string value need to set to attr
+     */
+    void SetString(const std::string &string);
+
+    /*
+     * set string list value to attr.
+     * @param vector<std::string>: string list value need to set to attr
+     */
+    void SetListString(const std::vector<std::string> &bytes);
+
+    /*
+     * get int value of attr.
+     * @return int64_t: int value of attr
+     */
+    int64_t GetInt() const;
+
+    /*
+     * get int list value of attr.
+     * @return vector<int64_t>: int list value of attr
+     */
+    std::vector<int64_t> GetListInt() const;
+
+    /*
+     * attr add int value to list.
+     * @param i: int value need to add to list
+     */
+    void AddListInt(int64_t i);
+
+    /*
+     * get int list size of attr.
+     * @return int32_t: int list size of attr
+     */
+    int32_t ListIntSize() const;
+
+    /*
+     * set int value to attr.
+     * @param i: int value need to set to attr
+     */
+    void SetInt(int64_t i);
+
+    /*
+     * set int list value to attr.
+     * @param vector<int64_t>: int list value need to set to attr
+     */
+    void SetListInt(const std::vector<int64_t> &i);
+
+    /*
+     * get float value of attr.
+     * @return float: float value of attr
+     */
+    float GetFloat() const;
+
+    /*
+     * get float list value of attr.
+     * @return vector<float>: float list value of attr
+     */
+    std::vector<float> GetListFloat() const;
+
+    /*
+     * attr add float value to list.
+     * @param f: float value need to add to list
+     */
+    void AddListFloat(float f);
+
+    /*
+     * get float list size of attr.
+     * @return int32_t: float list size of attr
+     */
+    int32_t ListFloatSize() const;
+
+    /*
+     * set float value to attr.
+     * @param f: float value need to set to attr
+     */
+    void SetFloat(float f);
+
+    /*
+     * set float list value to attr.
+     * @param vector<float>: float list value need to set to attr
+     */
+    void SetListFloat(const std::vector<float> &f);
+
+    /*
+     * get bool value of attr.
+     * @return bool: bool value of attr
+     */
+    bool GetBool() const;
+
+    /*
+     * get bool list value of attr.
+     * @return vector<bool>: bool list value of attr
+     */
+    std::vector<bool> GetListBool() const;
+
+    /*
+     * attr add bool value to list.
+     * @param b: bool value need to add to list
+     */
+    void AddListBool(bool b);
+
+    /*
+     * get bool list size of attr.
+     * @return int32_t: bool list size of attr
+     */
+    int32_t ListBoolSize() const;
+
+    /*
+     * set bool value to attr.
+     * @param b: bool value need to set to attr
+     */
+    void SetBool(bool b);
+
+    /*
+     * set bool list value to attr.
+     * @param vector<bool>: bool list value need to set to attr
+     */
+    void SetListBool(const std::vector<bool> &b);
+
+    /*
+     * get data type value of attr.
+     * @return DataType: data type value of attr
+     */
+    DataType GetDataType() const;
+
+    /*
+     * get data type list value of attr.
+     * @return vector<DataType>: data type list value of attr
+     */
+    std::vector<DataType> GetListDataType() const;
+
+    /*
+     * attr add data type value to list.
+     * @param type: data type value need to add to list
+     */
+    void AddListDataType(DataType type);
+
+    /*
+     * get data type list size of attr.
+     * @return int32_t: data type list size of attr
+     */
+    int32_t ListDataTypeSize() const;
+
+    /*
+     * set data type value to attr.
+     * @param type: data type value need to set to attr
+     */
+    void SetDataType(DataType type);
+
+    /*
+     * set data type list value to attr.
+     * @param vector<int32_t>: data type list value need to set to attr
+     */
+    void SetListDataType(const std::vector<DataType> &type);
+
+    /*
+     * set tensor shape value to attr.
+     * @param shape: tensor shape value need to set to attr
+     * @return bool: true->success false->failed
+     */
+    bool SetTensorShape(const TensorShape *shape);
+
+    /*
+     * set tensor shape list value to attr.
+     * @param vector<TensorShape>: tensor shape list value need to set to attr
+     * @return uint32_t: success number
+     */
+    uint32_t SetListTensorShape(const std::vector<TensorShape *> &shape);
+
+    /*
+     * attr add tensor shape value to list.
+     * @return shared_ptr<TensorShape>: tensor shape value ptr added to list
+     */
+    std::shared_ptr<TensorShape> AddListTensorShape();
+
+    /*
+     * get tensor shape value of attr.
+     * @return TensorShape: tensor shape value of attr
+     */
+    std::shared_ptr<TensorShape> GetTensorShape() const;
+
+    /*
+     * get tensor shape list value of attr.
+     * @return vector<TensorShape>: tensor shape list value of attr
+     */
+    std::vector<TensorShape> GetListTensorShape() const;
+
+    /*
+     * get tensor shape list size of attr.
+     * @return int32_t: tensor shape list size of attr
+     */
+    int32_t ListTensorShapeSize() const;
+
+    /*
+     * set tensor value to attr.
+     * @param tensor: tensor value need to set to attr
+     * @return bool: true->success false->failed
+     */
+    bool SetTensor(const Tensor *tensor);
+
+    /*
+     * set tensor list value to attr.
+     * @param vector<Tensor>: tensor list value need to set to attr
+     * @return uint32_t: success number
+     */
+    uint32_t SetListTensor(const std::vector<Tensor *> &tensor);
+
+    /*
+     * attr add tensor value to list.
+     * @return shared_ptr<Tensor>: tensor value ptr added to list
+     */
+    std::shared_ptr<Tensor> AddListTensor();
+
+    /*
+     * get tensor value of attr.
+     * @return Tensor: tensor value of attr
+     */
+    std::shared_ptr<Tensor> GetTensor() const;
+
+    /*
+     * get tensor list value of attr.
+     * @return vector<Tensor>: tensor list value of attr
+     */
+    std::vector<Tensor> GetListTensor() const;
+
+    /*
+     * get tensor list size of attr.
+     * @return int32_t: tensor list size of attr
+     */
+    int32_t ListTensorSize() const;
+
+private:
+    explicit AttrValue(AttrValueImpl *impl);
+
+private:
+    std::shared_ptr<AttrValueImpl> impl_ { nullptr };
+};
+} // namespace aicpu
+#endif // CPU_KERNEL_ATTR_VALUE_H
diff --git a/cpu_context/inc/cpu_context.h b/cpu_context/inc/cpu_context.h
new file mode 100644
index 0000000000000000000000000000000000000000..6ba94520be6ca198183da0484cb68c1c7778b840
--- /dev/null
+++ b/cpu_context/inc/cpu_context.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of context
+ */
+
+#ifndef CPU_KERNELS_CONTEXT_H
+#define CPU_KERNELS_CONTEXT_H
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "cpu_types.h"
+#include "cpu_tensor.h"
+#include "cpu_attr_value.h"
+
+namespace aicpu {
+class Device;
+class NodeDef;
+class AICPU_VISIBILITY CpuKernelContext {
+    friend class CpuKernelUtils;
+
+public:
+    explicit CpuKernelContext(DeviceType type);
+    CpuKernelContext() = delete;
+    ~CpuKernelContext() = default;
+    CpuKernelContext(const CpuKernelContext &) = delete;
+    CpuKernelContext(CpuKernelContext &&) = delete;
+    CpuKernelContext &operator = (const CpuKernelContext &) = delete;
+    CpuKernelContext &operator = (CpuKernelContext &&) = delete;
+
+    uint32_t Init(NodeDef *nodeDef);
+
+    /*
+     * get op type.
+     * @return string: op type
+     */
+    std::string GetOpType() const;
+
+    /*
+     * get input tensor.
+     * @return Tensor *: not null->success, null->failed
+     */
+    Tensor *Input(uint32_t index) const;
+
+    /*
+     * get output tensor.
+     * @return Tensor *: not null->success, null->failed
+     */
+    Tensor *Output(uint32_t index) const;
+
+    /*
+     * get attr.
+     * @return AttrValue *: not null->success, null->failed
+     */
+    AttrValue *GetAttr(std::string name) const;
+
+    /*
+     * get input size.
+     * @return uint32_t: input size
+     */
+    uint32_t GetInputsSize() const;
+
+    /*
+     * get output size.
+     * @return uint32_t: output size
+     */
+    uint32_t GetOutputsSize() const;
+
+private:
+    std::string op_;                                           // op type
+    std::vector<std::shared_ptr<Tensor> > inputs_;             // input tensor list
+    std::vector<std::shared_ptr<Tensor> > outputs_;            // out tensor list
+    std::unordered_map<std::string, std::shared_ptr<AttrValue> > attrs_; // attr list
+    std::shared_ptr<Device> device_ { nullptr };
+};
+} // namespace aicpu
+#endif // CPU_KERNELS_CONTEXT_H
diff --git a/cpu_context/inc/cpu_kernel.h b/cpu_context/inc/cpu_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..3ee5f042c6fea7dd573fd3b3671c98c0563f618a
--- /dev/null
+++ b/cpu_context/inc/cpu_kernel.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of cpu kernel
+ */
+
+#ifndef CPU_KERNEL_H
+#define CPU_KERNEL_H
+
+#include <functional>
+#include "cpu_context.h"
+
+namespace aicpu {
+class AICPU_VISIBILITY CpuKernel {
+public:
+    virtual uint32_t Compute(CpuKernelContext &ctx) = 0;
+
+    virtual ~CpuKernel() {}
+};
+
+using KERNEL_CREATOR_FUN = std::function<std::shared_ptr<CpuKernel>(void)>;
+
+AICPU_VISIBILITY bool RegistCpuKernel(const std::string &type, const KERNEL_CREATOR_FUN &fun);
+
+template <typename T, typename... Args> static inline std::shared_ptr<T> MakeShared(Args &&... args)
+{
+    typedef typename std::remove_const<T>::type T_nc;
+    std::shared_ptr<T> ret(new (std::nothrow) T_nc(std::forward<Args>(args)...));
+    return ret;
+}
+
+#define REGISTER_CPU_KERNEL(type, clazz) std::shared_ptr<CpuKernel> Creator_##type##_Kernel() \
+    {                                                    \
+        std::shared_ptr<clazz> ptr = nullptr;            \
+        ptr = MakeShared<clazz>();                       \
+        return ptr;                                      \
+    }                                                    \
+    bool g_##type##_Kernel_Creator __attribute__((unused)) = RegistCpuKernel(type, Creator_##type##_Kernel)
+}
+#endif // CPU_KERNEL_H
diff --git a/cpu_context/inc/cpu_kernel_register.h b/cpu_context/inc/cpu_kernel_register.h
new file mode 100644
index 0000000000000000000000000000000000000000..25c7e01f3dcaec6aed58ac8dd119e4dcc6727b3f
--- /dev/null
+++ b/cpu_context/inc/cpu_kernel_register.h
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_INC_REGISTAR_H_
+#define AICPU_CONTEXT_INC_REGISTAR_H_
+
+#include <map>
+#include <string>
+
+#include "cpu_context.h"
+#include "cpu_kernel.h"
+
+namespace aicpu {
+class AICPU_VISIBILITY CpuKernelRegister {
+ public:
+  /*
+   * get instance.
+   * @return CpuKernelRegister &: CpuKernelRegister instance
+   */
+  static CpuKernelRegister &Instance();
+
+  /*
+   * get cpu kernel.
+   * param op_type: the op type of kernel
+   * @return shared_ptr<CpuKernel>: cpu kernel ptr
+   */
+  std::shared_ptr<CpuKernel> GetCpuKernel(const std::string &op_type);
+
+  /*
+   * get all cpu kernel registered op types.
+   * @return std::vector<string>: all cpu kernel registered op type
+   */
+  std::vector<std::string> GetAllRegisteredOpTypes() const;
+
+  /*
+   * run cpu kernel.
+   * param ctx: context of kernel
+   * @return uint32_t: 0->success other->failed
+   */
+  uint32_t RunCpuKernel(CpuKernelContext &ctx);
+
+  // CpuKernel registration function to register different types of kernel to
+  // the factory
+  class Registerar {
+   public:
+    Registerar(const std::string &type, const KERNEL_CREATOR_FUN &fun);
+    ~Registerar() = default;
+
+    Registerar(const Registerar &) = delete;
+    Registerar(Registerar &&) = delete;
+    Registerar &operator=(const Registerar &) = delete;
+    Registerar &operator=(Registerar &&) = delete;
+  };
+
+ protected:
+  CpuKernelRegister() = default;
+  ~CpuKernelRegister() = default;
+
+  CpuKernelRegister(const CpuKernelRegister &) = delete;
+  CpuKernelRegister(CpuKernelRegister &&) = delete;
+  CpuKernelRegister &operator=(const CpuKernelRegister &) = delete;
+  CpuKernelRegister &operator=(CpuKernelRegister &&) = delete;
+
+  // register creator, this function will call in the constructor
+  void Register(const std::string &type, const KERNEL_CREATOR_FUN &fun);
+
+ private:
+  std::map<std::string, KERNEL_CREATOR_FUN> creatorMap_;  // kernel map
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_INC_REGISTAR_H_
diff --git a/cpu_context/inc/cpu_kernel_utils.h b/cpu_context/inc/cpu_kernel_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..512ecc8b85f605b877127ce7ab3a7543989e1103
--- /dev/null
+++ b/cpu_context/inc/cpu_kernel_utils.h
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_INC_UTILS_H_
+#define AICPU_CONTEXT_INC_UTILS_H_
+#include <functional>
+#include <memory>
+
+#include "cpu_attr_value.h"
+#include "cpu_context.h"
+#include "cpu_node_def.h"
+#include "cpu_tensor.h"
+
+namespace aicpu {
+class AICPU_VISIBILITY CpuKernelUtils {
+ public:
+  /*
+   * create Tensor.
+   * @return std::shared_ptr<Tensor>: Tensor ptr
+   */
+  static std::shared_ptr<Tensor> CreateTensor();
+
+  /*
+   * create Tensor.
+   * @param tensor: Tensor impl
+   * @return std::shared_ptr<Tensor>: Tensor ptr
+   */
+  static std::shared_ptr<Tensor> CreateTensor(TensorImpl *tensor);
+
+  /*
+   * get tensor impl.
+   */
+  static std::shared_ptr<TensorImpl> GetImpl(const Tensor *tensor);
+
+  /*
+   * get tensor name.
+   */
+  static std::string GetTensorName(const Tensor *tensor);
+
+  /*
+   * set tensor name.
+   */
+  static void SetTensorName(const std::string &name, std::shared_ptr<Tensor> &tensor);
+
+  /*
+   * create Tensor shape.
+   * @return std::shared_ptr<TensorShape>: TensorShape ptr
+   */
+  static std::shared_ptr<TensorShape> CreateTensorShape();
+
+  /*
+   * create Tensor Shape.
+   * @param tensorShape: Tensor shape impl
+   * @return std::shared_ptr<TensorShape>: TensorShape ptr
+   */
+  static std::shared_ptr<TensorShape> CreateTensorShape(
+      TensorShapeImpl *tensorShape);
+
+  /*
+   * get tensor shape impl.
+   */
+  static std::shared_ptr<TensorShapeImpl> GetImpl(
+      const TensorShape *tensorShape);
+
+  /*
+   * create attr value.
+   * @return std::shared_ptr<AttrValue>: attr value ptr
+   */
+  static std::shared_ptr<AttrValue> CreateAttrValue();
+
+  /*
+   * create attr value.
+   * @param attr_value: attr value impl
+   * @return std::shared_ptr<AttrValue>: attr value ptr
+   */
+  static std::shared_ptr<AttrValue> CreateAttrValue(AttrValueImpl *attr_value);
+
+  /*
+   * get attr value impl.
+   */
+  static std::shared_ptr<AttrValueImpl> GetImpl(const AttrValue *attr_value);
+
+  /*
+   * create node def.
+   * @return std::shared_ptr<NodeDef>: node def ptr
+   */
+  static std::shared_ptr<NodeDef> CreateNodeDef();
+
+  /*
+   * ParallelFor shards the "total" units of work.
+   * @param ctx: context info of kernel
+   * @param total: size of total work
+   * @param per_unit_size: expect size of per unit work
+   * @param work: process of per unit work
+   * @return uint32_t: 0->sucess other->failed
+   */
+  static uint32_t ParallelFor(
+      const CpuKernelContext &ctx, int64_t total, int64_t per_unit_size,
+      const std::function<void(int64_t, int64_t)> &work);
+
+  /*
+   * Get CPU number
+   * @param ctx: context info of kernel
+   * @return CPU number
+   */
+  static uint32_t GetCPUNum(const CpuKernelContext &ctx);
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_INC_UTILS_H_
diff --git a/cpu_context/inc/cpu_node_def.h b/cpu_context/inc/cpu_node_def.h
new file mode 100644
index 0000000000000000000000000000000000000000..7ca60ed5db86d26c868f27a2b4509e2fec6494a1
--- /dev/null
+++ b/cpu_context/inc/cpu_node_def.h
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AICPU_CONTEXT_INC_NODE_DEF_H_
+#define AICPU_CONTEXT_INC_NODE_DEF_H_
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "cpu_attr_value.h"
+#include "cpu_tensor.h"
+
+namespace aicpu {
+class NodeDefImpl;
+class AICPU_VISIBILITY NodeDef {
+  friend class CpuKernelUtils;
+
+ public:
+  NodeDef() = delete;
+  ~NodeDef() = default;
+
+  /*
+   * parse parameter from string.
+   * @return bool: true->success, false->failed
+   */
+  bool ParseFromString(const std::string &str);
+
+  /*
+   * serialize string to node def.
+   * @return bool: true->success, false->failed
+   */
+  bool SerializeToString(std::string &str) const;
+
+  /*
+   * set op type to node def.
+   * @param op: op type
+   */
+  void SetOpType(const std::string &op);
+
+  /*
+   * get op type of node def.
+   * @return string: op type
+   */
+  std::string GetOpType() const;
+
+  /*
+   * add input tensor to node def.
+   * @return shared_ptr<Tensor>: not null->success, null->failed
+   */
+  std::shared_ptr<Tensor> AddInputs();
+
+  /*
+   * add output tensor to node def.
+   * @return shared_ptr<Tensor>: not null->success, null->failed
+   */
+  std::shared_ptr<Tensor> AddOutputs();
+
+  /*
+   * add attr to node def.
+   * @param name: attr name
+   * @param attr: attr need to add
+   * @return bool: true->success, false->failed
+   */
+  bool AddAttrs(const std::string &name, const AttrValue *attr);
+
+  /*
+   * get input tensor size of node def.
+   * @return int32_t: input tensor size of node def
+   */
+  int32_t InputsSize() const;
+
+  /*
+   * get output tensor size of node def.
+   * @return int32_t: input tensor size of node def
+   */
+  int32_t OutputsSize() const;
+
+  /*
+   * get input tensor of node def.
+   * @param index: index of input tensor
+   * @return shared_ptr<Tensor>: input tensor ptr of node def
+   */
+  std::shared_ptr<Tensor> MutableInputs(int32_t index) const;
+
+  /*
+   * get output tensor of node def.
+   * @param index: index of output tensor
+   * @return shared_ptr<Tensor>: output tensor ptr of node def
+   */
+  std::shared_ptr<Tensor> MutableOutputs(int32_t index) const;
+
+  /*
+   * get attr of node def.
+   * @return unordered_map<std::string, std::shared_ptr<AttrValue>>: attrs of
+   * node def
+   */
+  std::unordered_map<std::string, std::shared_ptr<AttrValue> > Attrs() const;
+
+ private:
+  NodeDef(NodeDefImpl *impl);
+
+ private:
+  std::shared_ptr<NodeDefImpl> impl_{nullptr};
+};
+}  // namespace aicpu
+#endif  // AICPU_CONTEXT_INC_NODE_DEF_H_
diff --git a/cpu_context/inc/cpu_tensor.h b/cpu_context/inc/cpu_tensor.h
new file mode 100644
index 0000000000000000000000000000000000000000..686dd3e5f686e950d9394b95125b1393609c2395
--- /dev/null
+++ b/cpu_context/inc/cpu_tensor.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of tensor
+ */
+
+#ifndef CPU_KERNEL_TENSOR_H
+#define CPU_KERNEL_TENSOR_H
+#include <memory>
+
+#include "cpu_tensor_shape.h"
+
+namespace aicpu {
+class TensorImpl;
+class AICPU_VISIBILITY Tensor {
+    friend class CpuKernelUtils;
+
+public:
+    Tensor() = delete;
+    ~Tensor() = default;
+
+    /*
+     * set tensor shape value to tensor.
+     * @param shape: tensor shape value need to set to tensor
+     * @return bool: true->success, false->failed
+     */
+    bool SetTensorShape(const TensorShape *shape);
+
+    /*
+     * get tensor shape value of tensor.
+     * @return std::shared_ptr<TensorShape>: tensor shape value of tensor
+     */
+    std::shared_ptr<TensorShape> GetTensorShape() const;
+
+    /*
+     * set data type value to tensor.
+     * @param type: data type value need to set to tensor
+     */
+    void SetDataType(DataType type);
+
+    /*
+     * get data type value of tensor.
+     * @return DataType: data type value of tensor
+     */
+    DataType GetDataType() const;
+
+    /*
+     * set data ptr to tensor.
+     * @param addr: tensor data ptr
+     */
+    void SetData(void *addr);
+
+    /*
+     * get data ptr of tensor.
+     * @return void *: tensor data ptr
+     */
+    void *GetData() const;
+
+    /*
+     * set data size to tensor.
+     * @param size: tensor data size
+     */
+    void SetDataSize(uint64_t size);
+
+    /*
+     * get data size of tensor.
+     * @return uint64_t: tensor data size
+     */
+    uint64_t GetDataSize() const;
+
+    /*
+     * calculate data size by tensor shape.
+     * @return success->not less than 0, failed->less than 0
+     */
+    int64_t CalcDataSizeByShape() const;
+
+    /*
+     * get data elements number.
+     * @return success->not less than 0, unknown->less than 0
+     */
+    int64_t NumElements() const;
+
+private:
+    explicit Tensor(TensorImpl *impl);
+
+private:
+    std::shared_ptr<TensorImpl> impl_ { nullptr };
+};
+} // namespace aicpu
+#endif // CPU_KERNEL_TENSOR_H
diff --git a/cpu_context/inc/cpu_tensor_shape.h b/cpu_context/inc/cpu_tensor_shape.h
new file mode 100644
index 0000000000000000000000000000000000000000..effcc9d1a4d470658177c6e4466802a5fb33cb32
--- /dev/null
+++ b/cpu_context/inc/cpu_tensor_shape.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of tensor shape
+ */
+
+#ifndef CPU_KERNEL_TENSOR_SHAPE_H
+#define CPU_KERNEL_TENSOR_SHAPE_H
+#include <vector>
+#include <memory>
+
+#include "cpu_types.h"
+
+namespace aicpu {
+#ifdef VISIBILITY
+#define AICPU_VISIBILITY __attribute__((visibility("default")))
+#else
+#define AICPU_VISIBILITY
+#endif
+
+class TensorShapeImpl;
+class AICPU_VISIBILITY TensorShape {
+    friend class CpuKernelUtils;
+
+public:
+    TensorShape() = delete;
+    ~TensorShape() = default;
+
+    /*
+     * set format value to tensor shape.
+     * @param format: format value need to set to tensor shape
+     */
+    void SetFormat(Format format);
+
+    /*
+     * get format value of tensor shape.
+     * @return Format: format value of tensor shape
+     */
+    Format GetFormat() const;
+
+    /*
+     * get unknown rank value of tensor shape.
+     * @return bool: unknown rank value of tensor shape
+     */
+    bool GetUnknownRank() const;
+
+    /*
+     * set unknown rank value to tensor shape.
+     * @param unknownRank: unknown rank value need to set to tensor shape
+     */
+    void SetUnknownRank(bool unknownRank);
+
+    /*
+     * set dims value to tensor shape.
+     * @param dims: dims value need to set to tensor shape
+     */
+    void SetDimSizes(const std::vector<int64_t> &dims);
+
+    /*
+     * get dims value of tensor shape.
+     * @return int32_t: dims value of tensor shape
+     */
+    std::vector<int64_t> GetDimSizes() const;
+
+    /*
+     * get dim value of tensor shape index dim.
+     * @param index: index dim of tensor shape
+     * @return int64_t: dim value of tensor shape index dim
+     */
+    int64_t GetDimSize(int32_t index) const;
+
+    /*
+     * get dims size of tensor shape.
+     * @return int32_t: dims size of tensor shape
+     */
+    int32_t GetDims() const;
+
+    /*
+     * get data elements number.
+     * @return success->not less than 0, unknown->less than 0
+     */
+    int64_t NumElements() const;
+
+private:
+    explicit TensorShape(TensorShapeImpl *tensorShape);
+
+private:
+    std::shared_ptr<TensorShapeImpl> impl_ { nullptr };
+};
+} // namespace aicpu
+#endif // CPU_KERNEL_TENSOR_SHAPE_H
diff --git a/cpu_context/inc/cpu_types.h b/cpu_context/inc/cpu_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..829ccea592f640ebaae2069e6cc740527b19b3c2
--- /dev/null
+++ b/cpu_context/inc/cpu_types.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved.
+ * Description: api of types
+ */
+
+#ifndef CPU_KERNEL_TYPES_H
+#define CPU_KERNEL_TYPES_H
+
+#include <map>
+
+namespace aicpu {
+#ifdef VISIBILITY
+#define AICPU_VISIBILITY __attribute__((visibility("default")))
+#else
+#define AICPU_VISIBILITY
+#endif
+
+enum DataType {
+    DT_FLOAT = 0,           // float type
+    DT_FLOAT16 = 1,         // fp16 type
+    DT_INT8 = 2,            // int8 type
+    DT_INT16 = 6,           // int16 type
+    DT_UINT16 = 7,          // uint16 type
+    DT_UINT8 = 4,           // uint8 type
+    DT_INT32 = 3,           //
+    DT_INT64 = 9,           // int64 type
+    DT_UINT32 = 8,          // unsigned int32
+    DT_UINT64 = 10,         // unsigned int64
+    DT_BOOL = 12,           // bool type
+    DT_DOUBLE = 11,         // double type
+    DT_STRING = 13,         // string type
+    DT_DUAL_SUB_INT8 = 14,  // dual output int8 type
+    DT_DUAL_SUB_UINT8 = 15, // dual output uint8 type
+    DT_COMPLEX64 = 16,      // complex64 type
+    DT_COMPLEX128 = 17,     // complex128 type
+    DT_QINT8 = 18,          // qint8 type
+    DT_QINT16 = 19,         // qint16 type
+    DT_QINT32 = 20,         // qint32 type
+    DT_QUINT8 = 21,         // quint8 type
+    DT_QUINT16 = 22,        // quint16 type
+    DT_RESOURCE = 23,       // resource type
+    DT_STRING_REF = 24,     // string ref type
+    DT_DUAL = 25,           // dual output type
+    DT_UNDEFINED            // Used to indicate a DataType field has not been set.
+};
+
+AICPU_VISIBILITY inline int GetSizeByDataType(DataType dataType)
+{
+    const std::map<DataType, int> sizeMap = {
+        { DT_FLOAT, 4 },       { DT_FLOAT16, 2 },       { DT_INT8, 1 },           { DT_INT16, 2 },
+        { DT_UINT16, 2 },      { DT_UINT8, 1 },         { DT_INT32, 4 },          { DT_INT64, 8 },
+        { DT_UINT32, 4 },      { DT_UINT64, 8 },        { DT_BOOL, 1 },           { DT_DOUBLE, 8 },
+        { DT_STRING, -1 },     { DT_DUAL_SUB_INT8, 1 }, { DT_DUAL_SUB_UINT8, 1 }, { DT_COMPLEX64, 8 },
+        { DT_COMPLEX128, 16 }, { DT_QINT8, 1 },         { DT_QINT16, 2 },         { DT_QINT32, 4 },
+        { DT_QUINT8, 1 },      { DT_QUINT16, 2 },       { DT_RESOURCE, -1 },      { DT_STRING_REF, -1 },
+        { DT_DUAL, 5 }
+    };
+    auto iter = sizeMap.find(dataType);
+    if (iter == sizeMap.end()) {
+        return -1;
+    }
+    return iter->second;
+}
+
+enum Format {
+    FORMAT_NCHW = 0,  // NCHW
+    FORMAT_NHWC,      // NHWC
+    FORMAT_ND,        // Nd Tensor
+    FORMAT_NC1HWC0,   // NC1HWC0
+    FORMAT_FRACTAL_Z, // FRACTAL_Z
+    FORMAT_NC1C0HWPAD,
+    FORMAT_NHWC1C0,
+    FORMAT_FSR_NCHW,
+    FORMAT_FRACTAL_DECONV,
+    FORMAT_C1HWNC0,
+    FORMAT_FRACTAL_DECONV_TRANSPOSE,
+    FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS,
+    FORMAT_NC1HWC0_C04,   // NC1HWC0, C0 =4
+    FORMAT_FRACTAL_Z_C04, // FRACZ, C0 =4
+    FORMAT_CHWN,
+    FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS,
+    FORMAT_HWCN,
+    FORMAT_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format
+    FORMAT_BN_WEIGHT,
+    FORMAT_FILTER_HWCK, // filter input tensor format
+    FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20,
+    FORMAT_HASHTABLE_LOOKUP_KEYS,
+    FORMAT_HASHTABLE_LOOKUP_VALUE,
+    FORMAT_HASHTABLE_LOOKUP_OUTPUT,
+    FORMAT_HASHTABLE_LOOKUP_HITS = 24,
+    FORMAT_C1HWNCoC0,
+    FORMAT_MD,
+    FORMAT_NDHWC,
+    FORMAT_FRACTAL_ZZ,
+    FORMAT_FRACTAL_NZ,
+    FORMAT_NCDHW,
+    FORMAT_DHWCN, // 3D filter input tensor format
+    FORMAT_NDC1HWC0,
+    FORMAT_FRACTAL_Z_3D,
+    FORMAT_CN,
+    FORMAT_NC,
+    FORMAT_DHWNC,
+    FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format
+    FORMAT_FRACTAL_ZN_LSTM,
+    FORMAT_FRACTAL_Z_G,
+    FORMAT_RESERVED,
+    FORMAT_ALL,
+    FORMAT_NULL
+};
+
+enum DeviceType {
+    HOST,
+    DEVICE
+};
+} // namespace aicpu
+#endif // CPU_KERNEL_TYPES_H
diff --git a/cpu_context/module.mk b/cpu_context/module.mk
new file mode 100644
index 0000000000000000000000000000000000000000..ab72e1f79659beea1908aabc480ac1c194f722d1
--- /dev/null
+++ b/cpu_context/module.mk
@@ -0,0 +1,106 @@
+LOCAL_PATH := $(call my-dir)
+
+local_context_src_files := cpu_proto/proto/cpu_attr.proto \
+                           cpu_proto/proto/cpu_node_def.proto \
+                           cpu_proto/proto/cpu_tensor.proto \
+                           cpu_proto/proto/cpu_tensor_shape.proto \
+                           cpu_proto/node_def.cc \
+                           cpu_proto/node_def_impl.cc \
+                           cpu_proto/tensor.cc \
+                           cpu_proto/tensor_impl.cc \
+                           cpu_proto/tensor_shape.cc \
+                           cpu_proto/tensor_shape_impl.cc \
+                           cpu_proto/attr_value.cc \
+                           cpu_proto/attr_value_impl.cc \
+                           common/device.cc \
+                           common/context.cc \
+                           common/device_cpu_kernel.cc \
+                           common/cpu_kernel_register.cc \
+                           common/cpu_kernel_utils.cc \
+                           common/host_sharder.cc \
+                           common/device_sharder.cc \
+                           common/eigen_threadpool.cc \
+                           common/cpu_kernel_cache.cc \
+
+local_context_stub_files := stub/aicpu_sharder.cc \
+
+local_context_inc_path := $(LOCAL_PATH) \
+                          $(LOCAL_PATH)/common \
+                          $(LOCAL_PATH)/cpu_proto \
+                          $(TOPDIR)inc \
+                          $(TOPDIR)inc/aicpu \
+                          $(TOPDIR)inc/aicpu/aicpu_schedule/aicpu_sharder \
+                          $(TOPDIR)inc/aicpu/common \
+                          $(TOPDIR)inc/aicpu/cpu_kernels \
+                          $(TOPDIR)inc/external/aicpu \
+                          $(TOPDIR)libc_sec/include \
+                          $(TOPDIR)third_party/protobuf/include \
+                          ${TOPDIR}third_party/eigen/src/eigen-3.3.7 \
+                          ${TOPDIR}out/${product}
+
+# built shared libs for device
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libcpu_kernels_context
+
+LOCAL_SRC_FILES := $(local_context_src_files) \
+                   $(local_context_stub_files)
+LOCAL_C_INCLUDES := $(local_context_inc_path)
+
+LOCAL_CFLAGS += -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -std=c++11 -Dgoogle=ascend_private
+LOCAL_LDFLAGS += -Wl,-z,relro,-z,now -s -ldl -shared
+LOCAL_SHARED_LIBRARIES := libslog libc_sec libascend_protobuf
+
+include $(BUILD_SHARED_LIBRARY)
+
+# built shared lib for host
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libcpu_kernels_context
+
+LOCAL_SRC_FILES := $(local_context_src_files) \
+                   $(local_context_stub_files)
+LOCAL_C_INCLUDES := $(local_context_inc_path)
+
+LOCAL_CFLAGS += -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -DVISIBILITY -std=c++11 -Dgoogle=ascend_private
+LOCAL_CFLAGS += -fvisibility-inlines-hidden
+LOCAL_CFLAGS += -fvisibility=hidden
+
+LOCAL_LDFLAGS += -Wl,-z,relro,-z,now -s -ldl -shared
+LOCAL_LDFLAGS += -Wl,-Bsymbolic -Wl,--exclude-libs,ALL
+
+LOCAL_SHARED_LIBRARIES := libslog libc_sec libascend_protobuf
+
+include $(BUILD_HOST_SHARED_LIBRARY)
+
+# built static lib for host
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libcpu_kernels_context
+
+LOCAL_SRC_FILES := $(local_context_src_files) \
+                   $(local_context_stub_files)
+LOCAL_C_INCLUDES := $(local_context_inc_path)
+
+LOCAL_CFLAGS += -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -DVISIBILITY -std=c++11 -Dgoogle=ascend_private
+
+LOCAL_LDFLAGS += -Wl,-z,relro,-z,now -s -ldl -shared
+LOCAL_UNINSTALLABLE_MODULE := false
+
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+#built static lib for device
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libcpu_kernels_context
+
+LOCAL_SRC_FILES := $(local_context_src_files) \
+                   $(local_context_stub_files)
+LOCAL_C_INCLUDES := $(local_context_inc_path)
+
+LOCAL_CFLAGS += -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2 -ftrapv -DVISIBILITY -std=c++11 -Dgoogle=ascend_private
+
+LOCAL_LDFLAGS += -Wl,-z,relro,-z,now -s -ldl -shared
+LOCAL_UNINSTALLABLE_MODULE := false
+
+include $(BUILD_STATIC_LIBRARY)
diff --git a/cpu_context/stub/CMakeLists.txt b/cpu_context/stub/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3565838c981f9a286623fda9adb2e7c7a4fcec4c
--- /dev/null
+++ b/cpu_context/stub/CMakeLists.txt
@@ -0,0 +1,16 @@
+if("x${PRODUCT_SIDE}" STREQUAL "xdevice")
+  if (MINRC)
+    set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++)
+    set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc)
+  else()
+    set(CMAKE_CXX_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-target-linux-gnu-g++)
+    set(CMAKE_C_COMPILER ${TOOLCHAIN_DIR}/bin/aarch64-target-linux-gnu-gcc)
+  endif()
+  add_library(aicpu_sharder SHARED
+    ./aicpu_sharder.cc
+  )
+
+  target_compile_options(aicpu_sharder PRIVATE
+    -std=c++11
+  )
+endif()
\ No newline at end of file
diff --git a/cpu_context/stub/aicpu_context.h b/cpu_context/stub/aicpu_context.h
new file mode 100644
index 0000000000000000000000000000000000000000..f6ff7d72efc68024001270e8fc827adf177d2c10
--- /dev/null
+++ b/cpu_context/stub/aicpu_context.h
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OPS_BUILT_IN_AICPU_CONTEXT_STUB_AICPU_CONTEXT_H_
+#define OPS_BUILT_IN_AICPU_CONTEXT_STUB_AICPU_CONTEXT_H_
+
+#include <sys/types.h>
+
+#include <cstdint>
+#include <string>
+
+namespace aicpu {
+typedef struct {
+  uint32_t deviceId;
+  uint32_t tsId;
+  pid_t hostPid;
+} aicpuContext_t;
+
+typedef enum {
+  AICPU_ERROR_NONE = 0,
+  AICPU_ERROR_FAILED = 1,
+} status_t;
+
+enum CtxType : int32_t {
+  CTX_DEFAULT = 0,
+  CTX_PROF,
+  CTX_DEBUG
+};
+
+const std::string CONTEXT_KEY_OP_NAME = "opname";
+
+status_t aicpuSetContext(aicpuContext_t *ctx);
+
+status_t aicpuGetContext(aicpuContext_t *ctx);
+
+status_t InitTaskMonitorContext(uint32_t aicpuCoreCnt);
+
+status_t SetAicpuThreadIndex(uint32_t threadIndex);
+
+status_t __attribute__((weak)) SetOpname(const std::string &opname);
+
+status_t GetOpname(uint32_t threadIndex, std::string &opname);
+
+status_t __attribute__((weak))
+SetThreadLocalCtx(const std::string &key, const std::string &value);
+
+status_t __attribute__((weak))
+GetThreadLocalCtx(const std::string &key, std::string &value);
+
+status_t RemoveThreadLocalCtx(const std::string &key);
+}  // namespace aicpu
+#endif
\ No newline at end of file
diff --git a/cpu_context/stub/aicpu_sharder.cc b/cpu_context/stub/aicpu_sharder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..552fca0e8b4b9ea524d801dbb4fdf94fa4b276e0
--- /dev/null
+++ b/cpu_context/stub/aicpu_sharder.cc
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "aicpu_sharder.h"
+
+#include "aicpu_context.h"
+
+namespace aicpu {
+status_t __attribute__((weak)) GetThreadLocalCtx(const std::string &key, std::string &value) {
+  return AICPU_ERROR_NONE;
+}
+}  // namespace aicpu
diff --git a/cpu_context/stub/aicpu_sharder.h b/cpu_context/stub/aicpu_sharder.h
new file mode 100644
index 0000000000000000000000000000000000000000..c7d054ec52009aa08e229cd8ba07fa7e4a678dc1
--- /dev/null
+++ b/cpu_context/stub/aicpu_sharder.h
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OPS_BUILT_IN_AICPU_CONTEXT_STUB_AICPU_SHARDER_H_
+#define OPS_BUILT_IN_AICPU_CONTEXT_STUB_AICPU_SHARDER_H_
+
+#include <functional>
+#include <vector>
+
+namespace aicpu {
+using Closure = std::function<void()>;
+using ClosureBool = std::function<bool()>;
+using RunnerBool = std::function<bool(Closure)>;
+using SharderWork = std::function<void(int64_t, int64_t)>;
+
+class SharderNonBlock {
+ public:
+  static SharderNonBlock &GetInstance();
+
+  void Register(const RunnerBool &schedule, const ClosureBool &doTask,
+                uint32_t cpuCoreNum);
+  void ParallelFor(int64_t total, int64_t perUnitSize, const SharderWork &work);
+
+  void ParallelForHash(int64_t total, int64_t cpuNums, const SharderWork &work);
+
+  void Schedule(const Closure &closure);
+
+  uint32_t GetCPUNum();
+
+ private:
+  SharderNonBlock();
+  ~SharderNonBlock() = default;
+
+  SharderNonBlock(const SharderNonBlock &) = delete;
+  SharderNonBlock &operator=(const SharderNonBlock &) = delete;
+  SharderNonBlock(SharderNonBlock &&) = delete;
+  SharderNonBlock &operator=(SharderNonBlock &&) = delete;
+
+  bool Enqueue(const Closure &closure);
+  inline int64_t CeilMultiple(int64_t x, int64_t base);
+
+ private:
+  RunnerBool schedule_;
+  ClosureBool doTask_;
+  uint32_t cpuCoreNum_;
+};
+}  // namespace aicpu
+#endif
\ No newline at end of file