diff --git a/drivers/ub/urma/hw/udma/udma_cmd.c b/drivers/ub/urma/hw/udma/udma_cmd.c index 6e4c66af053763c928877f90412e125bd5aa7ff4..0e3c18c81e25c98aac2a9512b6dbd5c122babdc5 100644 --- a/drivers/ub/urma/hw/udma/udma_cmd.c +++ b/drivers/ub/urma/hw/udma/udma_cmd.c @@ -6,7 +6,11 @@ #include #include #include +#include "udma_eid.h" #include "udma_cmd.h" +#include "udma_jfc.h" +#include "udma_jfr.h" +#include "udma_jetty.h" bool debug_switch = true; @@ -71,6 +75,19 @@ void udma_free_cmd_mailbox(struct udma_dev *dev, kfree(mailbox); } +static void udma_set_mb_flag_or_fd(uint8_t op, struct udma_mbx_op_match *match, + void *buf) +{ + struct udma_jetty_ctx *jfs_ctx; + + if (op == UDMA_CMD_QUERY_JFS_CONTEXT) { + jfs_ctx = (struct udma_jetty_ctx *)buf; + jfs_ctx->flush_cqe_done = 1; + jfs_ctx->state = 1; + jfs_ctx->flush_ssn_vld = 1; + } +} + static bool udma_op_ignore_eagain(uint8_t op, void *buf) { struct udma_mbx_op_match matches[] = { @@ -100,8 +117,10 @@ static bool udma_op_ignore_eagain(uint8_t op, void *buf) uint32_t i; for (i = 0; i < ARRAY_SIZE(matches); i++) { - if (op == matches[i].op) + if (op == matches[i].op) { + udma_set_mb_flag_or_fd(op, &matches[i], buf); return matches[i].ignore_ret; + } } return false; diff --git a/drivers/ub/urma/hw/udma/udma_common.c b/drivers/ub/urma/hw/udma/udma_common.c index 375ed4826f6accb090682455ea356c079ce3a895..07d57a5ce96b2bd87d3ab2644b7daffc5ac4b4bb 100644 --- a/drivers/ub/urma/hw/udma/udma_common.c +++ b/drivers/ub/urma/hw/udma/udma_common.c @@ -430,6 +430,28 @@ void udma_init_udma_table_mutex(struct xarray *table, struct mutex *udma_mutex) mutex_init(udma_mutex); } +void udma_destroy_npu_cb_table(struct udma_dev *dev) +{ + struct udma_ctrlq_event_nb *nb = NULL; + unsigned long index = 0; + + mutex_lock(&dev->npu_nb_mutex); + if (!xa_empty(&dev->npu_nb_table)) { + xa_for_each(&dev->npu_nb_table, index, nb) { + ubase_ctrlq_unregister_crq_event(dev->comdev.adev, + UBASE_CTRLQ_SER_TYPE_DEV_REGISTER, + nb->opcode); + __xa_erase(&dev->npu_nb_table, index); + kfree(nb); + nb = NULL; + } + } + + mutex_unlock(&dev->npu_nb_mutex); + xa_destroy(&dev->npu_nb_table); + mutex_destroy(&dev->npu_nb_mutex); +} + void udma_destroy_udma_table(struct udma_dev *dev, struct udma_table *table, const char *table_name) { @@ -539,7 +561,7 @@ static void udma_unpin_k_addr(struct ubcore_umem *umem) udma_umem_release(umem, true); } -int udma_k_alloc_buf(struct udma_dev *udma_dev, size_t memory_size, +int udma_alloc_normal_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf) { size_t aligned_memory_size; @@ -571,7 +593,7 @@ int udma_k_alloc_buf(struct udma_dev *udma_dev, size_t memory_size, return 0; } -void udma_k_free_buf(struct udma_dev *udma_dev, size_t memory_size, +void udma_free_normal_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf) { udma_unpin_k_addr(buf->umem); @@ -581,6 +603,153 @@ void udma_k_free_buf(struct udma_dev *udma_dev, size_t memory_size, buf->addr = 0; } +static struct udma_hugepage_priv * +udma_alloc_hugepage_priv(struct udma_dev *dev, uint32_t len) +{ + struct udma_hugepage_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + priv->va_len = ALIGN(len, UDMA_HUGEPAGE_SIZE); + if (priv->va_len >> UDMA_HUGEPAGE_SHIFT > dev->total_hugepage_num) { + dev_err(dev->dev, "insufficient resources for mmap.\n"); + goto err_vmalloc_huge; + } + + priv->left_va_len = priv->va_len; + priv->va_base = vmalloc_huge(priv->va_len, GFP_KERNEL); + if (!priv->va_base) { + dev_err(dev->dev, "failed to vmalloc_huge, size=%u.", priv->va_len); + goto err_vmalloc_huge; + } + memset(priv->va_base, 0, priv->va_len); + + priv->umem = udma_pin_k_addr(&dev->ub_dev, (uint64_t)priv->va_base, priv->va_len); + if (IS_ERR(priv->umem)) { + dev_err(dev->dev, "pin kernel buf failed.\n"); + goto err_pin; + } + + refcount_set(&priv->refcnt, 1); + list_add(&priv->list, &dev->hugepage_list); + dev->total_hugepage_num -= priv->va_len >> UDMA_HUGEPAGE_SHIFT; + + if (dfx_switch) + dev_info_ratelimited(dev->dev, "map_hugepage, 2m_page_num=%u.\n", + priv->va_len >> UDMA_HUGEPAGE_SHIFT); + return priv; + +err_pin: + vfree(priv->va_base); +err_vmalloc_huge: + kfree(priv); + + return NULL; +} + +static struct udma_hugepage * +udma_alloc_hugepage(struct udma_dev *dev, uint32_t len) +{ + struct udma_hugepage_priv *priv = NULL; + struct udma_hugepage *hugepage; + bool b_reuse = false; + + hugepage = kzalloc(sizeof(*hugepage), GFP_KERNEL); + if (!hugepage) + return NULL; + + mutex_lock(&dev->hugepage_lock); + if (!list_empty(&dev->hugepage_list)) { + priv = list_first_entry(&dev->hugepage_list, struct udma_hugepage_priv, list); + b_reuse = len <= priv->left_va_len; + } + + if (b_reuse) { + refcount_inc(&priv->refcnt); + } else { + priv = udma_alloc_hugepage_priv(dev, len); + if (!priv) { + mutex_unlock(&dev->hugepage_lock); + kfree(hugepage); + return NULL; + } + } + + hugepage->va_start = priv->va_base + priv->left_va_offset; + hugepage->va_len = len; + hugepage->priv = priv; + priv->left_va_offset += len; + priv->left_va_len -= len; + mutex_unlock(&dev->hugepage_lock); + + if (dfx_switch) + dev_info_ratelimited(dev->dev, "occupy_hugepage, 4k_page_num=%u.\n", + hugepage->va_len >> UDMA_HW_PAGE_SHIFT); + return hugepage; +} + +static void udma_free_hugepage(struct udma_dev *dev, struct udma_hugepage *hugepage) +{ + struct udma_hugepage_priv *priv = hugepage->priv; + + if (dfx_switch) + dev_info_ratelimited(dev->dev, "return_hugepage, 4k_page_num=%u.\n", + hugepage->va_len >> UDMA_HW_PAGE_SHIFT); + mutex_lock(&dev->hugepage_lock); + if (refcount_dec_and_test(&priv->refcnt)) { + if (dfx_switch) + dev_info_ratelimited(dev->dev, "unmap_hugepage, 2m_page_num=%u.\n", + priv->va_len >> UDMA_HUGEPAGE_SHIFT); + list_del(&priv->list); + dev->total_hugepage_num += priv->va_len >> UDMA_HUGEPAGE_SHIFT; + + udma_unpin_k_addr(priv->umem); + vfree(priv->va_base); + kfree(priv); + } else { + memset(hugepage->va_start, 0, hugepage->va_len); + } + mutex_unlock(&dev->hugepage_lock); + kfree(hugepage); +} + +int udma_k_alloc_buf(struct udma_dev *dev, struct udma_buf *buf) +{ + uint32_t size = buf->entry_size * buf->entry_cnt; + uint32_t hugepage_size; + int ret = 0; + + if (ubase_adev_prealloc_supported(dev->comdev.adev)) { + hugepage_size = ALIGN(size, UDMA_HW_PAGE_SIZE); + buf->hugepage = udma_alloc_hugepage(dev, hugepage_size); + if (buf->hugepage) { + buf->kva = buf->hugepage->va_start; + buf->addr = (uint64_t)buf->kva; + buf->is_hugepage = true; + } else { + dev_warn(dev->dev, + "failed to alloc hugepage buf, switch to alloc normal buf."); + ret = udma_alloc_normal_buf(dev, size, buf); + } + } else { + ret = udma_alloc_normal_buf(dev, size, buf); + } + + return ret; +} + +void udma_k_free_buf(struct udma_dev *dev, struct udma_buf *buf) +{ + uint32_t size = buf->entry_cnt * buf->entry_size; + + if (buf->is_hugepage) + udma_free_hugepage(dev, buf->hugepage); + else + udma_free_normal_buf(dev, size, buf); +} + void *udma_alloc_iova(struct udma_dev *udma_dev, size_t memory_size, dma_addr_t *addr) { struct iova_slot *slot; @@ -628,6 +797,35 @@ void udma_free_iova(struct udma_dev *udma_dev, size_t memory_size, void *kva_or_ dma_free_iova(slot); } +int udma_query_ue_idx(struct ubcore_device *ubcore_dev, struct ubcore_devid *devid, + uint16_t *ue_idx) +{ + struct udma_dev *dev = to_udma_dev(ubcore_dev); + struct udma_ue_index_cmd cmd = {}; + struct ubase_cmd_buf out; + struct ubase_cmd_buf in; + int ret; + + if (!devid) { + dev_err(dev->dev, "failed to query ue idx, devid is NULL.\n"); + return -EINVAL; + } + + (void)memcpy(cmd.guid, devid->raw, sizeof(devid->raw)); + + udma_fill_buf(&in, UDMA_CMD_QUERY_UE_INDEX, true, sizeof(cmd), &cmd); + udma_fill_buf(&out, UDMA_CMD_QUERY_UE_INDEX, true, sizeof(cmd), &cmd); + + ret = ubase_cmd_send_inout(dev->comdev.adev, &in, &out); + if (ret) { + dev_err(dev->dev, "failed to query ue idx, ret = %d.\n", ret); + return ret; + } + *ue_idx = cmd.ue_idx; + + return 0; +} + void udma_dfx_ctx_print(struct udma_dev *udev, const char *name, uint32_t id, uint32_t len, uint32_t *ctx) { @@ -650,3 +848,25 @@ void udma_swap_endian(uint8_t arr[], uint8_t res[], uint32_t res_size) for (i = 0; i < res_size; i++) res[i] = arr[res_size - i - 1]; } + +void udma_init_hugepage(struct udma_dev *dev) +{ + INIT_LIST_HEAD(&dev->hugepage_list); + mutex_init(&dev->hugepage_lock); +} + +void udma_destroy_hugepage(struct udma_dev *dev) +{ + struct udma_hugepage_priv *priv; + + mutex_lock(&dev->hugepage_lock); + list_for_each_entry(priv, &dev->hugepage_list, list) { + dev_info(dev->dev, "unmap_hugepage, 2m_page_num=%u.\n", + priv->va_len >> UDMA_HUGEPAGE_SHIFT); + udma_unpin_k_addr(priv->umem); + vfree(priv->va_base); + kfree(priv); + } + mutex_unlock(&dev->hugepage_lock); + mutex_destroy(&dev->hugepage_lock); +} diff --git a/drivers/ub/urma/hw/udma/udma_common.h b/drivers/ub/urma/hw/udma/udma_common.h index b1b129ee4449c2e42c972eda80acc2b8eb2a7d91..dee92a4186d357a0e19b1d22e2dba697b917dade 100644 --- a/drivers/ub/urma/hw/udma/udma_common.h +++ b/drivers/ub/urma/hw/udma/udma_common.h @@ -5,10 +5,14 @@ #define __UDMA_COMM_H__ #include +#include #include #include "udma_ctx.h" #include "udma_dev.h" +#define TP_ACK_UDP_SPORT_H_OFFSET 8 +#define UDMA_TPHANDLE_TPID_SHIFT 0xFFFFFF + struct udma_jetty_grp { struct ubcore_jetty_group ubcore_jetty_grp; uint32_t start_jetty_id; @@ -42,6 +46,7 @@ struct udma_jetty_queue { uint32_t lock_free; /* Support kernel mode lock-free mode */ uint32_t ta_timeout; /* ms */ enum ubcore_jetty_state state; + struct udma_context *udma_ctx; bool non_pin; struct udma_jetty_grp *jetty_grp; enum udma_jetty_type jetty_type; @@ -66,10 +71,254 @@ struct udma_umem_param { bool is_kernel; }; +struct udma_ue_index_cmd { + uint16_t ue_idx; + uint8_t rsv[2]; + uint8_t guid[16]; +}; + +struct udma_tp_ctx { + /* Byte4 */ + uint32_t version : 1; + uint32_t tp_mode : 1; + uint32_t trt : 1; + uint32_t wqe_bb_shift : 4; + uint32_t oor_en : 1; + uint32_t tempid : 6; + uint32_t portn : 6; + uint32_t rsvd1 : 12; + /* Byte8 */ + uint32_t wqe_ba_l; + /* Byte12 */ + uint32_t wqe_ba_h : 20; + uint32_t udp_srcport_range : 4; + uint32_t cng_alg_sel : 3; + uint32_t lbi : 1; + uint32_t rsvd4 : 1; + uint32_t vlan_en : 1; + uint32_t mtu : 2; + /* Byte16 */ + uint32_t route_addr_idx : 20; + uint32_t rsvd6 : 12; + /* Byte20 */ + u32 tpn_vtpn : 24; + u32 rsvd7 : 8; + /* Byte24 to Byte28 */ + u32 rsvd8[2]; + /* Byte 32 */ + u32 seid_idx : 16; + u32 sjetty_l : 16; + /* Byte 36 */ + u32 sjetty_h : 4; + u32 tp_wqe_token_id : 20; + u32 tp_wqe_position : 1; + u32 rsv9_l : 7; + /* Byte 40 */ + u32 rsvd9_h : 6; + u32 taack_tpn : 24; + u32 rsvd10 : 2; + /* Byte 44 */ + u32 spray_en : 1; + u32 sr_en : 1; + u32 ack_freq_mode : 1; + u32 route_type : 2; + u32 vl : 4; + u32 dscp : 6; + u32 switch_mp_en : 1; + u32 at_times : 5; + u32 retry_num_init : 3; + u32 at : 5; + u32 rsvd13 : 3; + /* Byte 48 */ + u32 on_flight_size : 16; + u32 hpln : 8; + u32 fl_l : 8; + /* Byte 52 */ + u32 fl_h : 12; + u32 dtpn : 20; + /* Byte 56 */ + u32 rc_tpn : 24; + u32 rc_vl : 4; + u32 tpg_vld : 1; + u32 reorder_cap : 3; + /* Byte 60 */ + u32 reorder_q_shift : 4; + u32 reorder_q_addr_l : 28; + /* Byte 64 */ + u32 reorder_q_addr_h : 24; + u32 tpg_l : 8; + /* Byte 68 */ + u32 tpg_h : 12; + u32 jettyn : 20; + /* Byte 72 */ + u32 dyn_timeout_mode : 1; + u32 base_time : 23; + u32 rsvd15 : 8; + /* Byte 76 */ + u32 tpack_psn : 24; + u32 tpack_rspst : 3; + u32 tpack_rspinfo : 5; + /* Byte 80 */ + u32 tpack_msn : 24; + u32 ack_udp_srcport_l : 8; + /* Byte 84 */ + u32 ack_udp_srcport_h : 8; + u32 max_rcv_psn : 24; + /* Byte 88 */ + u32 scc_token : 19; + u32 poll_db_wait_do : 1; + u32 msg_rty_lp_flg : 1; + u32 retry_cnt : 3; + u32 sq_invld_flg : 1; + u32 wait_ack_timeout : 1; + u32 tx_rtt_caling : 1; + u32 cnp_tx_flag : 1; + u32 sq_db_doing : 1; + u32 tpack_doing : 1; + u32 sack_wait_do : 1; + u32 tpack_wait_do : 1; + /* Byte 92 */ + u16 post_max_idx; + u16 wqe_max_bb_idx; + /* Byte 96 */ + u16 wqe_bb_pi; + u16 wqe_bb_ci; + /* Byte 100 */ + u16 data_udp_srcport; + u16 wqe_msn; + /* Byte 104 */ + u32 cur_req_psn : 24; + u32 tx_ack_psn_err : 1; + u32 poll_db_type : 2; + u32 tx_ack_flg : 1; + u32 tx_sq_err_flg : 1; + u32 scc_retry_type : 2; + u32 flush_cqe_wait_do : 1; + /* Byte 108 */ + u32 wqe_max_psn : 24; + u32 ssc_token_l : 4; + u32 rsvd16 : 4; + /* Byte 112 */ + u32 tx_sq_timer; + /* Byte 116 */ + u32 rtt_timestamp_psn : 24; + u32 rsvd17 : 8; + /* Byte 120 */ + u32 rtt_timestamp : 24; + u32 cnp_timer_l : 8; + /* Byte 124 */ + u32 cnp_timer_h : 16; + u32 max_reorder_id : 16; + /* Byte 128 */ + u16 cur_reorder_id; + u16 wqe_max_msn; + /* Byte 132 */ + u16 post_bb_pi; + u16 post_bb_ci; + /* Byte 136 */ + u32 lr_ae_ind : 1; + u32 rx_cqe_cnt : 16; + u32 reorder_q_si : 13; + u32 rq_err_type_l : 2; + /* Byte 140 */ + u32 rq_err_type_h : 3; + u32 rsvd18 : 2; + u32 rsvd19 : 27; + /* Byte 144 */ + u32 req_seq; + /* Byte 148 */ + uint32_t req_ce_seq; + /* Byte 152 */ + u32 req_cmp_lrb_indx : 12; + u32 req_lrb_indx : 12; + u32 req_lrb_indx_vld : 1; + u32 rx_req_psn_err : 1; + u32 rx_req_last_optype : 3; + u32 rx_req_fake_flg : 1; + u32 rsvd20 : 2; + /* Byte 156 */ + uint16_t jfr_wqe_idx; + uint16_t rx_req_epsn_l; + /* Byte 160 */ + uint32_t rx_req_epsn_h : 8; + uint32_t rx_req_reduce_code : 8; + uint32_t rx_req_msn_l : 16; + /* Byte 164 */ + uint32_t rx_req_msn_h : 8; + uint32_t jfr_wqe_rnr : 1; + uint32_t jfr_wqe_rnr_timer : 5; + uint32_t rsvd21 : 2; + uint32_t jfr_wqe_cnt : 16; + /* Byte 168 */ + uint32_t max_reorder_q_idx : 13; + uint32_t rsvd22 : 3; + uint32_t reorder_q_ei : 13; + uint32_t rx_req_last_elr_flg : 1; + uint32_t rx_req_last_elr_err_type_l : 2; + /* Byte172 */ + uint32_t rx_req_last_elr_err_type_h : 3; + uint32_t rx_req_last_op : 1; + uint32_t jfrx_jetty : 1; + uint32_t jfrx_jfcn_l : 16; + uint32_t jfrx_jfcn_h : 4; + uint32_t jfrx_jfrn_l : 7; + /* Byte176 */ + u32 jfrx_jfrn_h1 : 9; + u32 jfrx_jfrn_h2 : 4; + u32 rq_timer_l : 19; + /* Byte180 */ + u32 rq_timer_h : 13; + u32 rq_at : 5; + u32 wait_cqe_timeout : 1; + u32 rsvd23 : 13; + /* Byte184 */ + u32 rx_sq_timer; + /* Byte188 */ + u32 tp_st : 3; + u32 rsvd24 : 4; + u32 ls_ae_ind : 1; + u32 retry_msg_psn : 24; + /* Byte192 */ + u32 retry_msg_fpsn : 24; + u32 rsvd25 : 8; + /* Byte196 */ + u16 retry_wqebb_idx; + u16 retry_msg_msn; + /* Byte200 */ + u32 ack_rcv_seq; + /* Byte204 */ + u32 rtt : 24; + u32 dup_sack_cnt : 8; + /* Byte208 */ + u32 sack_max_rcv_psn : 24; + u32 rsvd26 : 7; + u32 rx_ack_flg : 1; + /* Byte212 */ + u32 rx_ack_msn : 16; + u32 sack_lrb_indx : 12; + u32 rx_fake_flg : 1; + u32 rx_rtt_caling : 1; + u32 rx_ack_psn_err : 1; + u32 sack_lrb_indx_vld : 1; + /* Byte216 */ + u32 rx_ack_epsn : 24; + u32 rsvd27 : 8; + /* Byte220 */ + u32 max_retry_psn : 24; + u32 retry_reorder_id_l : 8; + /* Byte224 */ + u32 retry_reorder_id_h : 8; + u32 rsvd28 : 8; + u32 rsvd29 : 16; + /* Byte228 to Byte256 */ + u32 scc_data[8]; +}; + struct ubcore_umem *udma_umem_get(struct udma_umem_param *param); void udma_umem_release(struct ubcore_umem *umem, bool is_kernel); void udma_init_udma_table(struct udma_table *table, uint32_t max, uint32_t min); void udma_init_udma_table_mutex(struct xarray *table, struct mutex *udma_mutex); +void udma_destroy_npu_cb_table(struct udma_dev *dev); void udma_destroy_udma_table(struct udma_dev *dev, struct udma_table *table, const char *table_name); void udma_destroy_eid_table(struct udma_dev *udma_dev); @@ -77,12 +326,20 @@ void udma_dfx_store_id(struct udma_dev *udma_dev, struct udma_dfx_entity *entity uint32_t id, const char *name); void udma_dfx_delete_id(struct udma_dev *udma_dev, struct udma_dfx_entity *entity, uint32_t id); -int udma_k_alloc_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf); -void udma_k_free_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf); +int udma_alloc_normal_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf); +void udma_free_normal_buf(struct udma_dev *udma_dev, size_t memory_size, struct udma_buf *buf); +int udma_k_alloc_buf(struct udma_dev *dev, struct udma_buf *buf); +void udma_k_free_buf(struct udma_dev *dev, struct udma_buf *buf); void *udma_alloc_iova(struct udma_dev *udma_dev, size_t memory_size, dma_addr_t *addr); void udma_free_iova(struct udma_dev *udma_dev, size_t memory_size, void *kva_or_slot, dma_addr_t addr); +static inline void udma_write64(struct udma_dev *udma_dev, + uint64_t *val, void __iomem *dest) +{ + writeq(*val, dest); +} + static inline void udma_alloc_kernel_db(struct udma_dev *dev, struct udma_jetty_queue *queue) { @@ -91,6 +348,13 @@ static inline void udma_alloc_kernel_db(struct udma_dev *dev, queue->db_addr = queue->dwqe_addr + UDMA_DOORBELL_OFFSET; } +static inline void *get_buf_entry(struct udma_buf *buf, uint32_t n) +{ + uint32_t entry_index = n & (buf->entry_cnt - 1); + + return (char *)buf->kva + (entry_index * buf->entry_size); +} + static inline uint8_t to_ta_timeout(uint32_t err_timeout) { #define TA_TIMEOUT_DIVISOR 8 @@ -102,8 +366,13 @@ static inline uint64_t udma_cal_npages(uint64_t va, uint64_t len) return (ALIGN(va + len, PAGE_SIZE) - ALIGN_DOWN(va, PAGE_SIZE)) / PAGE_SIZE; } +int udma_query_ue_idx(struct ubcore_device *ub_dev, struct ubcore_devid *devid, + uint16_t *ue_idx); void udma_dfx_ctx_print(struct udma_dev *udev, const char *name, uint32_t id, uint32_t len, uint32_t *ctx); void udma_swap_endian(uint8_t arr[], uint8_t res[], uint32_t res_size); +void udma_init_hugepage(struct udma_dev *dev); +void udma_destroy_hugepage(struct udma_dev *dev); + #endif /* __UDMA_COMM_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_ctl.c b/drivers/ub/urma/hw/udma/udma_ctl.c index 8b709dc10a2036409941b0439e8dbb234db4cbe8..0dafb84d352d2c01e22b92d681398ab08f23e89a 100644 --- a/drivers/ub/urma/hw/udma/udma_ctl.c +++ b/drivers/ub/urma/hw/udma/udma_ctl.c @@ -11,6 +11,7 @@ #include #include "udma_cmd.h" #include "udma_jetty.h" +#include "udma_segment.h" #include "udma_jfs.h" #include "udma_jfc.h" #include "udma_db.h" @@ -18,6 +19,1102 @@ #include #include "udma_def.h" +const char *udma_cqe_aux_info_type_str[] = { + "TPP2TQEM_WR_CNT", + "DEVICE_RAS_STATUS_2", + "RXDMA_WR_PAYL_AXI_ERR", + "RXDMA_HEAD_SPLIT_ERR_FLAG0", + "RXDMA_HEAD_SPLIT_ERR_FLAG1", + "RXDMA_HEAD_SPLIT_ERR_FLAG2", + "RXDMA_HEAD_SPLIT_ERR_FLAG3", + "TP_RCP_INNER_ALM", + "TWP_AE_DFX", + "PA_OUT_PKT_ERR_CNT", + "TP_DAM_AXI_ALARM", + "TP_DAM_VFT_BT_ALARM", + "TP_EUM_AXI_ALARM", + "TP_EUM_VFT_BT_ALARM", + "TP_TPMM_AXI_ALARM", + "TP_TPMM_VFT_BT_ALARM", + "TP_TPGCM_AXI_ALARM", + "TP_TPGCM_VFT_BT_ALARM", + "TWP_ALM", + "TP_RWP_INNER_ALM", + "TWP_DFX21", + "LQC_TA_RNR_TANACK_CNT", + "FVT", + "RQMT0", + "RQMT1", + "RQMT2", + "RQMT3", + "RQMT4", + "RQMT5", + "RQMT6", + "RQMT7", + "RQMT8", + "RQMT9", + "RQMT10", + "RQMT11", + "RQMT12", + "RQMT13", + "RQMT14", + "RQMT15", + "PROC_ERROR_ALM", + "LQC_TA_TIMEOUT_TAACK_CNT", + "TP_RRP_ERR_FLG_0", +}; + +const char *udma_ae_aux_info_type_str[] = { + "TP_RRP_FLUSH_TIMER_PKT_CNT", + "TPP_DFX5", + "TWP_AE_DFX", + "TP_RRP_ERR_FLG_0", + "TP_RRP_ERR_FLG_1", + "TP_RWP_INNER_ALM", + "TP_RCP_INNER_ALM", + "LQC_TA_TQEP_WQE_ERR", + "LQC_TA_CQM_CQE_INNER_ALARM", +}; + +static int udma_get_sq_buf_ex(struct udma_dev *dev, struct udma_jetty_queue *sq, + struct udma_jfs_cfg_ex *cfg_ex) +{ + struct ubcore_jfs_cfg *jfs_cfg; + uint32_t wqe_bb_depth; + uint32_t sqe_bb_cnt; + uint32_t size; + + jfs_cfg = &cfg_ex->base_cfg; + + if (!jfs_cfg->flag.bs.lock_free) + spin_lock_init(&sq->lock); + sq->max_inline_size = jfs_cfg->max_inline_data; + sq->max_sge_num = jfs_cfg->max_sge; + sq->tid = dev->tid; + sq->lock_free = jfs_cfg->flag.bs.lock_free; + + sqe_bb_cnt = sq_cal_wqebb_num(SQE_WRITE_NOTIFY_CTL_LEN, jfs_cfg->max_sge); + if (sqe_bb_cnt > MAX_WQEBB_NUM) + sqe_bb_cnt = MAX_WQEBB_NUM; + sq->sqe_bb_cnt = sqe_bb_cnt; + + wqe_bb_depth = roundup_pow_of_two(sqe_bb_cnt * jfs_cfg->depth); + sq->buf.entry_size = UDMA_JFS_WQEBB_SIZE; + size = ALIGN(wqe_bb_depth * sq->buf.entry_size, UDMA_HW_PAGE_SIZE); + sq->buf.entry_cnt = size >> WQE_BB_SIZE_SHIFT; + + if (size != cfg_ex->cstm_cfg.sq.buff_size) { + dev_err(dev->dev, "buff size is wrong, buf size = %u.\n", size); + return -EINVAL; + } + + if (cfg_ex->cstm_cfg.sq.buff == 0) { + dev_err(dev->dev, "cstm_cfg sq buff is wrong.\n"); + return -EINVAL; + } + + sq->buf.addr = (dma_addr_t)(uintptr_t)phys_to_virt((uint64_t) + (uintptr_t)cfg_ex->cstm_cfg.sq.buff); + if (sq->buf.addr == 0) { + dev_err(dev->dev, "sq buff addr is wrong.\n"); + return -EINVAL; + } + + sq->buf.kva = (void *)(uintptr_t)sq->buf.addr; + + sq->wrid = kcalloc(1, sq->buf.entry_cnt * sizeof(uint64_t), GFP_KERNEL); + if (!sq->wrid) { + sq->buf.kva = NULL; + sq->buf.addr = 0; + dev_err(dev->dev, + "failed to alloc wrid for jfs id = %u when entry cnt = %u.\n", + sq->id, sq->buf.entry_cnt); + return -ENOMEM; + } + + udma_alloc_kernel_db(dev, sq); + sq->kva_curr = sq->buf.kva; + + sq->trans_mode = jfs_cfg->trans_mode; + + return 0; +} + +static int udma_get_jfs_buf_ex(struct udma_dev *dev, struct udma_jfs *jfs, + struct udma_jfs_cfg_ex *cfg_ex) +{ + int ret; + + jfs->jfs_addr = (uintptr_t)&jfs->sq; + + ret = udma_get_sq_buf_ex(dev, &jfs->sq, cfg_ex); + if (ret) + dev_err(dev->dev, + "failed to get sq buf in jfs process, ret = %d.\n", ret); + + return ret; +} + +static struct ubcore_jfs *udma_create_jfs_ex(struct ubcore_device *ub_dev, + struct udma_jfs_cfg_ex *cfg_ex) +{ + struct ubcore_jfs_cfg *cfg = &cfg_ex->base_cfg; + struct udma_dev *dev = to_udma_dev(ub_dev); + struct ubase_mbx_attr attr = {}; + struct udma_jetty_ctx ctx = {}; + struct udma_jfs *jfs; + int ret; + + ret = udma_verify_jfs_param(dev, cfg, true); + if (ret) + return NULL; + + jfs = kcalloc(1, sizeof(*jfs), GFP_KERNEL); + if (!jfs) + return NULL; + + dev_info(dev->dev, "start alloc id!\n"); + ret = udma_alloc_jetty_id(dev, &jfs->sq.id, &dev->caps.jetty); + if (ret) { + dev_err(dev->dev, "alloc JFS id failed, ret = %d.\n", ret); + goto err_alloc_jfsn; + } + jfs->ubcore_jfs.jfs_id.id = jfs->sq.id; + jfs->ubcore_jfs.jfs_cfg = *cfg; + jfs->ubcore_jfs.ub_dev = ub_dev; + jfs->ubcore_jfs.uctx = NULL; + jfs->ubcore_jfs.jfae_handler = cfg_ex->jfae_handler; + jfs->mode = UDMA_KERNEL_STARS_JFS_TYPE; + + ret = xa_err(xa_store(&dev->jetty_table.xa, jfs->sq.id, &jfs->sq, GFP_KERNEL)); + if (ret) { + dev_err(dev->dev, "store jfs sq(%u) failed, ret = %d.\n", + jfs->sq.id, ret); + goto err_store_jfs_sq; + } + + dev_info(dev->dev, "start get stars jfs buf!\n"); + ret = udma_get_jfs_buf_ex(dev, jfs, cfg_ex); + if (ret) + goto err_alloc_jfs_id; + + udma_set_query_flush_time(&jfs->sq, cfg->err_timeout); + jfs->sq.state = UBCORE_JETTY_STATE_READY; + udma_init_jfsc(dev, cfg, jfs, &ctx); + attr.tag = jfs->sq.id; + attr.op = UDMA_CMD_CREATE_JFS_CONTEXT; + ret = post_mailbox_update_ctx(dev, &ctx, sizeof(ctx), &attr); + if (ret) { + dev_err(dev->dev, "failed to upgrade JFSC, ret = %d.\n", ret); + goto err_update_ctx; + } + + refcount_set(&jfs->ae_refcount, 1); + init_completion(&jfs->ae_comp); + + if (dfx_switch) + udma_dfx_store_jfs_id(dev, jfs); + + dev_info(dev->dev, "create stars jfs success!\n"); + + return &jfs->ubcore_jfs; + +err_update_ctx: + kfree(jfs->sq.wrid); +err_alloc_jfs_id: + xa_erase(&dev->jetty_table.xa, jfs->sq.id); +err_store_jfs_sq: + udma_adv_id_free(&dev->jetty_table.bitmap_table, jfs->sq.id, false); +err_alloc_jfsn: + kfree(jfs); + return NULL; +} + +static int udma_create_jfs_ops_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct udma_jfs_cfg_ex cfg_ex; + struct ubcore_jfs *jfs; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct udma_jfs_cfg_ex)) || + udma_check_base_param(out->addr, out->len, sizeof(struct ubcore_jfs *))) { + dev_err(udev->dev, "param invalid in create jfs, in_len = %u, out_len = %u.\n", + in->len, out->len); + return -EINVAL; + } + + memcpy(&cfg_ex, (void *)(uintptr_t)in->addr, sizeof(struct udma_jfs_cfg_ex)); + + jfs = udma_create_jfs_ex(dev, &cfg_ex); + if (jfs == NULL) + return -EFAULT; + + memcpy((void *)(uintptr_t)out->addr, &jfs, sizeof(struct ubcore_jfs *)); + + return 0; +} + +static int udma_delete_jfs_ops_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct ubcore_jfs *jfs; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct ubcore_jfs *))) { + dev_err(udev->dev, "parameter invalid in delete jfs, len = %u.\n", + in->len); + return -EFAULT; + } + memcpy(&jfs, (void *)(uintptr_t)in->addr, sizeof(struct ubcore_jfs *)); + if (jfs == NULL) + return -EINVAL; + + if (udma_destroy_jfs(jfs)) + return -EFAULT; + + return 0; +} + +static int udma_get_jfc_buf_ex(struct udma_dev *dev, + struct udma_jfc *jfc, + struct udma_jfc_cfg_ex *cfg_ex) +{ + uint32_t size; + int ret = 0; + + if (!jfc->lock_free) + spin_lock_init(&jfc->lock); + jfc->buf.entry_size = dev->caps.cqe_size; + jfc->tid = dev->tid; + size = jfc->buf.entry_size * jfc->buf.entry_cnt; + + if (size != cfg_ex->cstm_cfg.cq.buff_size) { + dev_err(dev->dev, "cqe buff size is wrong, buf size = %u.\n", size); + return -EINVAL; + } + + jfc->buf.addr = (dma_addr_t)(uintptr_t)cfg_ex->cstm_cfg.cq.buff; + + if (jfc->buf.addr == 0) { + dev_err(dev->dev, "cq buff addr is wrong.\n"); + return -EINVAL; + } + + jfc->buf.kva = (void *)(uintptr_t)jfc->buf.addr; + + ret = udma_alloc_sw_db(dev, &jfc->db, UDMA_JFC_TYPE_DB); + if (ret) { + dev_err(dev->dev, "failed to alloc sw db for jfc(%u).\n", jfc->jfcn); + return -ENOMEM; + } + + return ret; +} + +static struct ubcore_jfc *udma_create_jfc_ex(struct ubcore_device *ubcore_dev, + struct udma_jfc_cfg_ex *cfg_ex) +{ + struct udma_dev *dev = to_udma_dev(ubcore_dev); + struct ubcore_jfc_cfg *cfg = &cfg_ex->base_cfg; + unsigned long flags_store; + unsigned long flags_erase; + struct udma_jfc *jfc; + int ret; + + jfc = kzalloc(sizeof(struct udma_jfc), GFP_KERNEL); + if (!jfc) + return NULL; + + jfc->arm_sn = 1; + jfc->buf.entry_cnt = cfg->depth ? roundup_pow_of_two(cfg->depth) : cfg->depth; + + ret = udma_check_jfc_cfg(dev, jfc, &cfg_ex->base_cfg); + if (ret) + goto err_check_cfg; + + ret = udma_id_alloc_auto_grow(dev, &dev->jfc_table.ida_table, &jfc->jfcn); + if (ret) + goto err_alloc_jfc_id; + + udma_init_jfc_param(cfg, jfc); + jfc->base.ub_dev = ubcore_dev; + jfc->base.uctx = NULL; + jfc->base.jfae_handler = cfg_ex->jfae_handler; + jfc->base.jfce_handler = cfg_ex->jfce_handler; + jfc->mode = UDMA_KERNEL_STARS_JFC_TYPE; + + xa_lock_irqsave(&dev->jfc_table.xa, flags_store); + ret = xa_err(__xa_store(&dev->jfc_table.xa, jfc->jfcn, jfc, GFP_ATOMIC)); + xa_unlock_irqrestore(&dev->jfc_table.xa, flags_store); + if (ret) { + dev_err(dev->dev, + "failed to stored jfc id to jfc_table, jfcn: %u.\n", + jfc->jfcn); + goto err_store_jfcn; + } + + ret = udma_get_jfc_buf_ex(dev, jfc, cfg_ex); + if (ret) + goto err_get_jfc_buf; + + ret = udma_post_create_jfc_mbox(dev, jfc); + if (ret) + goto err_alloc_cqc; + + refcount_set(&jfc->event_refcount, 1); + + init_completion(&jfc->event_comp); + + if (dfx_switch) + udma_dfx_store_id(dev, &dev->dfx_info->jfc, jfc->jfcn, "jfc"); + + return &jfc->base; + +err_alloc_cqc: + udma_free_sw_db(dev, &jfc->db); +err_get_jfc_buf: + xa_lock_irqsave(&dev->jfc_table.xa, flags_erase); + __xa_erase(&dev->jfc_table.xa, jfc->jfcn); + xa_unlock_irqrestore(&dev->jfc_table.xa, flags_erase); +err_store_jfcn: + udma_id_free(&dev->jfc_table.ida_table, jfc->jfcn); +err_alloc_jfc_id: +err_check_cfg: + kfree(jfc); + return NULL; +} + +static int udma_create_jfc_ops_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct udma_jfc_cfg_ex cfg_ex; + struct ubcore_jfc *jfc; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct udma_jfc_cfg_ex)) || + udma_check_base_param(out->addr, out->len, sizeof(struct ubcore_jfc *))) { + dev_err(udev->dev, "input parameter invalid in create jfc, in_len = %u, out_len = %u.\n", + in->len, out->len); + return -EINVAL; + } + + memcpy(&cfg_ex, (void *)(uintptr_t)in->addr, + min(in->len, sizeof(struct udma_jfc_cfg_ex))); + + jfc = udma_create_jfc_ex(dev, &cfg_ex); + if (jfc == NULL) + return -EFAULT; + + memcpy((void *)(uintptr_t)out->addr, &jfc, sizeof(struct ubcore_jfc *)); + + return 0; +} + +static int udma_delete_jfc_ops_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct ubcore_jfc *jfc; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct ubcore_jfc *))) { + dev_err(udev->dev, "parameter invalid in delete jfc, len = %u.\n", + in->len); + return -EINVAL; + } + + memcpy(&jfc, (void *)(uintptr_t)in->addr, + min(in->len, sizeof(struct ubcore_jfc *))); + if (jfc == NULL) + return -EINVAL; + + if (udma_destroy_jfc(jfc)) + return -EFAULT; + + return 0; +} + +static int udma_set_cqe_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct udma_ex_jfc_addr *jfc_addr; + struct udma_set_cqe_ex cqe_ex; + uint32_t cq_depth; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct udma_set_cqe_ex))) { + dev_err(udev->dev, "parameter invalid in set cqe, len = %u.\n", + in->len); + return -EINVAL; + } + + memcpy(&cqe_ex, (void *)(uintptr_t)in->addr, + min(in->len, sizeof(struct udma_set_cqe_ex))); + + if (cqe_ex.jfc_type != UDMA_STARS_JFC_TYPE && + cqe_ex.jfc_type != UDMA_CCU_JFC_TYPE) { + dev_err(udev->dev, "invalid jfc type, mode = %u.\n", cqe_ex.jfc_type); + return -EINVAL; + } + + if (cqe_ex.addr == 0) { + dev_err(udev->dev, "cq addr is wrong in set cqe.\n"); + return -EINVAL; + } + + cq_depth = cqe_ex.len / udev->caps.cqe_size; + if (cq_depth < UDMA_JFC_DEPTH_MIN || cq_depth > udev->caps.jfc.depth || + (cqe_ex.len % udev->caps.cqe_size) != 0 || + cq_depth != roundup_pow_of_two(cq_depth)) { + dev_err(udev->dev, "cq buff size is wrong in set cqe, size = %u.\n", + cqe_ex.len); + return -EINVAL; + } + + jfc_addr = &udev->cq_addr_array[cqe_ex.jfc_type]; + jfc_addr->cq_addr = cqe_ex.addr; + jfc_addr->cq_len = cqe_ex.len; + + return 0; +} + +static int udma_query_ue_info_ex(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct udma_ue_info_ex info = {}; + + if (udma_check_base_param(out->addr, out->len, sizeof(struct udma_ue_info_ex))) { + dev_err(udev->dev, "parameter invalid in query ue, len = %u.\n", + out->len); + return -EINVAL; + } + + info.chip_id = udev->chip_id; + info.die_id = udev->die_id; + info.dwqe_addr = udev->db_base + JETTY_DSQE_OFFSET; + info.db_base_addr = info.dwqe_addr + UDMA_DOORBELL_OFFSET; + info.ue_id = udev->ue_id; + info.register_base_addr = udev->db_base; + info.offset_len = PAGE_SIZE; + + memcpy((void *)(uintptr_t)out->addr, &info, sizeof(struct udma_ue_info_ex)); + + return 0; +} + +static int udma_ctrlq_query_tp_sport(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_tp_sport_out tp_sport_out = {}; + struct udma_tp_sport_in tp_sport_in = {}; + struct udma_dev *udev = to_udma_dev(dev); + struct ubase_cmd_mailbox *mailbox = NULL; + struct ubase_mbx_attr mbox_attr = {}; + struct udma_tp_ctx *tpc; + + if (udma_check_base_param(out->addr, out->len, sizeof(struct udma_tp_sport_out)) || + udma_check_base_param(in->addr, in->len, sizeof(struct udma_tp_sport_in))) { + dev_err(udev->dev, "parameter invalid in query tp sport, in_len = %u, out_len = %u.\n", + in->len, out->len); + return -EINVAL; + } + + if (udev->is_ue) { + dev_err(udev->dev, "ue is not supported.\n"); + return -EINVAL; + } + + memcpy(&tp_sport_in, (void *)(uintptr_t)in->addr, sizeof(struct udma_tp_sport_in)); + + mbox_attr.tag = tp_sport_in.tpn; + mbox_attr.op = UDMA_CMD_QUERY_TP_CONTEXT; + mailbox = udma_mailbox_query_ctx(udev, &mbox_attr); + if (!mailbox) + return -ENOMEM; + + tpc = (struct udma_tp_ctx *)mailbox->buf; + + tp_sport_out.ack_udp_srcport = tpc->ack_udp_srcport_h << TP_ACK_UDP_SPORT_H_OFFSET | + tpc->ack_udp_srcport_l; + tp_sport_out.data_udp_srcport = tpc->data_udp_srcport; + + memcpy((void *)(uintptr_t)out->addr, &tp_sport_out, out->len); + + udma_free_cmd_mailbox(udev, mailbox); + + return 0; +} + +static void dump_cqe_client_loc_len_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + TPP2TQEM_WR_CNT, + DEVICE_RAS_STATUS_2, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_client_loc_access_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + RXDMA_WR_PAYL_AXI_ERR, + RXDMA_HEAD_SPLIT_ERR_FLAG0, + RXDMA_HEAD_SPLIT_ERR_FLAG1, + RXDMA_HEAD_SPLIT_ERR_FLAG2, + RXDMA_HEAD_SPLIT_ERR_FLAG3, + TP_RCP_INNER_ALM_FOR_CQE, + TWP_AE_DFX_FOR_CQE, + PA_OUT_PKT_ERR_CNT, + TP_DAM_AXI_ALARM, + TP_DAM_VFT_BT_ALARM, + TP_EUM_AXI_ALARM, + TP_EUM_VFT_BT_ALARM, + TP_TPMM_AXI_ALARM, + TP_TPMM_VFT_BT_ALARM, + TP_TPGCM_AXI_ALARM, + TP_TPGCM_VFT_BT_ALARM, + DEVICE_RAS_STATUS_2, + TWP_ALM, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_client_rem_resp_len_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + TP_RWP_INNER_ALM_FOR_CQE, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void +dump_cqe_client_rem_access_abort_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + RXDMA_WR_PAYL_AXI_ERR, + RXDMA_HEAD_SPLIT_ERR_FLAG0, + RXDMA_HEAD_SPLIT_ERR_FLAG1, + RXDMA_HEAD_SPLIT_ERR_FLAG2, + RXDMA_HEAD_SPLIT_ERR_FLAG3, + TP_RCP_INNER_ALM_FOR_CQE, + TP_RRP_ERR_FLG_0_FOR_CQE, + TPP2TQEM_WR_CNT, + TWP_DFX21 + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_client_ack_timeout_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + LQC_TA_TIMEOUT_TAACK_CNT, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void +dump_cqe_client_rnr_retry_cnt_exc_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + LQC_TA_RNR_TANACK_CNT, + FVT, + RQMT0, + RQMT1, + RQMT2, + RQMT3, + RQMT4, + RQMT5, + RQMT6, + RQMT7, + RQMT8, + RQMT9, + RQMT10, + RQMT11, + RQMT12, + RQMT13, + RQMT14, + RQMT15, + PROC_ERROR_ALM, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_server_loc_access_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + TP_RWP_INNER_ALM_FOR_CQE, + RXDMA_WR_PAYL_AXI_ERR, + RXDMA_HEAD_SPLIT_ERR_FLAG0, + RXDMA_HEAD_SPLIT_ERR_FLAG1, + RXDMA_HEAD_SPLIT_ERR_FLAG2, + RXDMA_HEAD_SPLIT_ERR_FLAG3, + TP_RCP_INNER_ALM_FOR_CQE, + TP_RRP_ERR_FLG_0_FOR_CQE, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_server_loc_len_err_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + enum udma_cqe_aux_info_type type[] = { + TP_RWP_INNER_ALM_FOR_CQE, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[type[i]], info->cqe_aux_info[type[i]]); +} + +static void dump_cqe_all_aux_info(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) +{ + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= MAX_CQE_AUX_INFO_TYPE_NUM) { + for (i = 0; i < MAX_CQE_AUX_INFO_TYPE_NUM; i++) { + aux_info_out->aux_info_type[i] = i; + aux_info_out->aux_info_value[i] = info->cqe_aux_info[i]; + } + aux_info_out->aux_info_num = MAX_CQE_AUX_INFO_TYPE_NUM; + } + + for (i = 0; i < MAX_CQE_AUX_INFO_TYPE_NUM; i++) + dev_info(dev->dev, "%s\t0x%08x\n", + udma_cqe_aux_info_type_str[i], info->cqe_aux_info[i]); +} + +static void (*udma_cqe_aux_info_dump[14][2])(struct udma_dev *dev, + struct udma_cqe_aux_info_out *aux_info_out, + struct udma_cmd_query_cqe_aux_info *info) = { + {NULL, NULL}, + {dump_cqe_all_aux_info, dump_cqe_all_aux_info}, + {dump_cqe_server_loc_len_err_aux_info, + dump_cqe_client_loc_len_err_aux_info}, + {NULL, NULL}, + {dump_cqe_server_loc_access_err_aux_info, + dump_cqe_client_loc_access_err_aux_info}, + {dump_cqe_all_aux_info, + dump_cqe_client_rem_resp_len_err_aux_info}, + {dump_cqe_all_aux_info, dump_cqe_all_aux_info}, + {NULL, NULL}, + {dump_cqe_all_aux_info, + dump_cqe_client_rem_access_abort_err_aux_info}, + {dump_cqe_all_aux_info, + dump_cqe_client_ack_timeout_err_aux_info}, + {dump_cqe_all_aux_info, + dump_cqe_client_rnr_retry_cnt_exc_err_aux_info}, + {dump_cqe_all_aux_info, dump_cqe_all_aux_info}, + {NULL, NULL}, + {dump_cqe_all_aux_info, dump_cqe_all_aux_info}, +}; + +static void dump_fill_aux_info(struct udma_dev *dev, struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info, + enum udma_ae_aux_info_type *type, uint32_t aux_info_num) +{ + int i; + + if (aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL && + aux_info_out->aux_info_num >= aux_info_num) { + for (i = 0; i < aux_info_num; i++) { + aux_info_out->aux_info_type[i] = type[i]; + aux_info_out->aux_info_value[i] = info->ae_aux_info[type[i]]; + } + aux_info_out->aux_info_num = aux_info_num; + } + + for (i = 0; i < aux_info_num; i++) + dev_info(dev->dev, "%s\t0x%08x\n", udma_ae_aux_info_type_str[type[i]], + info->ae_aux_info[type[i]]); +} + +static void dump_ae_tp_flush_done_aux_info(struct udma_dev *dev, + struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info) +{ + enum udma_ae_aux_info_type type[] = { + TP_RRP_FLUSH_TIMER_PKT_CNT, + TPP_DFX5, + }; + + uint32_t aux_info_num = ARRAY_SIZE(type); + + dump_fill_aux_info(dev, aux_info_out, info, type, aux_info_num); +} + +static void dump_ae_tp_err_aux_info(struct udma_dev *dev, + struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info) +{ + enum udma_ae_aux_info_type type[] = { + TWP_AE_DFX_FOR_AE, + TP_RRP_ERR_FLG_0_FOR_AE, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + + dump_fill_aux_info(dev, aux_info_out, info, type, aux_info_num); +} + +static void dump_ae_jetty_err_aux_info(struct udma_dev *dev, + struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info) +{ + enum udma_ae_aux_info_type type[] = { + TP_RRP_ERR_FLG_0_FOR_AE, + TP_RRP_ERR_FLG_1, + TP_RWP_INNER_ALM_FOR_AE, + TP_RCP_INNER_ALM_FOR_AE, + LQC_TA_TQEP_WQE_ERR, + LQC_TA_CQM_CQE_INNER_ALARM, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + + dump_fill_aux_info(dev, aux_info_out, info, type, aux_info_num); +} + +static void dump_ae_jfc_err_aux_info(struct udma_dev *dev, + struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info) +{ + enum udma_ae_aux_info_type type[] = { + LQC_TA_CQM_CQE_INNER_ALARM, + }; + uint32_t aux_info_num = ARRAY_SIZE(type); + + dump_fill_aux_info(dev, aux_info_out, info, type, aux_info_num); +} + +static void dump_ae_aux_info(struct udma_dev *dev, + struct udma_ae_aux_info_out *aux_info_out, + struct udma_cmd_query_ae_aux_info *info) +{ + switch (info->event_type) { + case UBASE_EVENT_TYPE_TP_FLUSH_DONE: + dump_ae_tp_flush_done_aux_info(dev, aux_info_out, info); + break; + case UBASE_EVENT_TYPE_TP_LEVEL_ERROR: + dump_ae_tp_err_aux_info(dev, aux_info_out, info); + break; + case UBASE_EVENT_TYPE_JETTY_LEVEL_ERROR: + if (info->sub_type == UBASE_SUBEVENT_TYPE_JFS_CHECK_ERROR) + dump_ae_jetty_err_aux_info(dev, aux_info_out, info); + else + dump_ae_jfc_err_aux_info(dev, aux_info_out, info); + break; + default: + break; + } +} + +static int send_cmd_query_cqe_aux_info(struct udma_dev *udma_dev, + struct udma_cmd_query_cqe_aux_info *info) +{ + struct ubase_cmd_buf cmd_in, cmd_out; + int ret; + + udma_fill_buf(&cmd_in, UDMA_CMD_GET_CQE_AUX_INFO, true, + sizeof(struct udma_cmd_query_cqe_aux_info), info); + udma_fill_buf(&cmd_out, UDMA_CMD_GET_CQE_AUX_INFO, true, + sizeof(struct udma_cmd_query_cqe_aux_info), info); + + ret = ubase_cmd_send_inout(udma_dev->comdev.adev, &cmd_in, &cmd_out); + if (ret) + dev_err(udma_dev->dev, + "failed to query cqe aux info, ret = %d.\n", ret); + + return ret; +} + +static void free_kernel_cqe_aux_info(struct udma_cqe_aux_info_out *user_aux_info_out, + struct udma_cqe_aux_info_out *aux_info_out) +{ + if (!user_aux_info_out->aux_info_type) + return; + + kfree(aux_info_out->aux_info_type); + aux_info_out->aux_info_type = NULL; + + kfree(aux_info_out->aux_info_value); + aux_info_out->aux_info_value = NULL; +} + +static int copy_out_cqe_data_from_user(struct udma_dev *udma_dev, + struct ubcore_user_ctl_out *out, + struct udma_cqe_aux_info_out *aux_info_out, + struct ubcore_ucontext *uctx, + struct udma_cqe_aux_info_out *user_aux_info_out) +{ + if (out->addr != 0 && out->len == sizeof(struct udma_cqe_aux_info_out)) { + memcpy(aux_info_out, (void *)(uintptr_t)out->addr, + sizeof(struct udma_cqe_aux_info_out)); + if (uctx && aux_info_out->aux_info_num > 0 && + aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL) { + if (aux_info_out->aux_info_num > MAX_CQE_AUX_INFO_TYPE_NUM) { + dev_err(udma_dev->dev, + "invalid cqe aux info num %u.\n", + aux_info_out->aux_info_num); + return -EINVAL; + } + + user_aux_info_out->aux_info_type = aux_info_out->aux_info_type; + user_aux_info_out->aux_info_value = aux_info_out->aux_info_value; + aux_info_out->aux_info_type = + kcalloc(aux_info_out->aux_info_num, + sizeof(enum udma_cqe_aux_info_type), GFP_KERNEL); + if (!aux_info_out->aux_info_type) + return -ENOMEM; + + aux_info_out->aux_info_value = + kcalloc(aux_info_out->aux_info_num, + sizeof(uint32_t), GFP_KERNEL); + if (!aux_info_out->aux_info_value) { + kfree(aux_info_out->aux_info_type); + return -ENOMEM; + } + } + } + + return 0; +} + +static int copy_out_cqe_data_to_user(struct udma_dev *udma_dev, + struct ubcore_user_ctl_out *out, + struct udma_cqe_aux_info_out *aux_info_out, + struct ubcore_ucontext *uctx, + struct udma_cqe_aux_info_out *user_aux_info_out) +{ + unsigned long byte; + + if (out->addr != 0 && out->len == sizeof(struct udma_cqe_aux_info_out)) { + if (uctx && aux_info_out->aux_info_num > 0 && + aux_info_out->aux_info_type != NULL && + aux_info_out->aux_info_value != NULL) { + byte = copy_to_user((void __user *)user_aux_info_out->aux_info_type, + (void *)aux_info_out->aux_info_type, + aux_info_out->aux_info_num * + sizeof(enum udma_cqe_aux_info_type)); + if (byte) { + dev_err(udma_dev->dev, + "copy resp to aux info type failed, byte = %lu.\n", byte); + return -EFAULT; + } + + byte = copy_to_user((void __user *)user_aux_info_out->aux_info_value, + (void *)aux_info_out->aux_info_value, + aux_info_out->aux_info_num * + sizeof(uint32_t)); + if (byte) { + dev_err(udma_dev->dev, + "copy resp to aux info value failed, byte = %lu.\n", byte); + return -EFAULT; + } + + kfree(aux_info_out->aux_info_type); + kfree(aux_info_out->aux_info_value); + aux_info_out->aux_info_type = user_aux_info_out->aux_info_type; + aux_info_out->aux_info_value = user_aux_info_out->aux_info_value; + } + memcpy((void *)(uintptr_t)out->addr, aux_info_out, + sizeof(struct udma_cqe_aux_info_out)); + } + + return 0; +} + +int udma_query_cqe_aux_info(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_cqe_aux_info_out user_aux_info_out = {}; + struct udma_cqe_aux_info_out aux_info_out = {}; + struct udma_cmd_query_cqe_aux_info info = {}; + struct udma_cqe_info_in cqe_info_in = {}; + struct udma_dev *udev = to_udma_dev(dev); + int ret; + + if (udma_check_base_param(in->addr, in->len, sizeof(struct udma_cqe_info_in))) { + dev_err(udev->dev, "parameter invalid in query cqe aux info, in_len = %u.\n", + in->len); + return -EINVAL; + } + memcpy(&cqe_info_in, (void *)(uintptr_t)in->addr, + sizeof(struct udma_cqe_info_in)); + + info.status = cqe_info_in.status; + info.is_client = !(cqe_info_in.s_r & 1); + if (cqe_info_in.status >= ARRAY_SIZE(udma_cqe_aux_info_dump) || + udma_cqe_aux_info_dump[info.status][info.is_client] == NULL) { + dev_err(udev->dev, "status %u is invalid or does not need to be queried.\n", + cqe_info_in.status); + return -EINVAL; + } + + ret = copy_out_cqe_data_from_user(udev, out, &aux_info_out, uctx, &user_aux_info_out); + if (ret) { + dev_err(udev->dev, + "copy out data from user failed, ret = %d.\n", ret); + return ret; + } + + ret = send_cmd_query_cqe_aux_info(udev, &info); + if (ret) { + dev_err(udev->dev, + "send cmd query aux info failed, ret = %d.\n", + ret); + free_kernel_cqe_aux_info(&user_aux_info_out, &aux_info_out); + return ret; + } + + udma_cqe_aux_info_dump[info.status][info.is_client](udev, &aux_info_out, &info); + + ret = copy_out_cqe_data_to_user(udev, out, &aux_info_out, uctx, &user_aux_info_out); + if (ret) { + dev_err(udev->dev, + "copy out data to user failed, ret = %d.\n", ret); + free_kernel_cqe_aux_info(&user_aux_info_out, &aux_info_out); + } + + return ret; +} + static int to_hw_ae_event_type(struct udma_dev *udma_dev, uint32_t event_type, struct udma_cmd_query_ae_aux_info *info) { @@ -199,6 +1296,8 @@ int udma_query_ae_aux_info(struct ubcore_device *dev, struct ubcore_ucontext *uc return ret; } + dump_ae_aux_info(udma_dev, &aux_info_out, &info); + ret = copy_out_ae_data_to_user(udma_dev, out, &aux_info_out, uctx, &user_aux_info_out); if (ret) { dev_err(udma_dev->dev, @@ -208,3 +1307,143 @@ int udma_query_ae_aux_info(struct ubcore_device *dev, struct ubcore_ucontext *uc return ret; } + +static udma_user_ctl_ops g_udma_user_ctl_k_ops[] = { + [UDMA_USER_CTL_CREATE_JFS_EX] = udma_create_jfs_ops_ex, + [UDMA_USER_CTL_DELETE_JFS_EX] = udma_delete_jfs_ops_ex, + [UDMA_USER_CTL_CREATE_JFC_EX] = udma_create_jfc_ops_ex, + [UDMA_USER_CTL_DELETE_JFC_EX] = udma_delete_jfc_ops_ex, + [UDMA_USER_CTL_SET_CQE_ADDR] = udma_set_cqe_ex, + [UDMA_USER_CTL_QUERY_UE_INFO] = udma_query_ue_info_ex, + [UDMA_USER_CTL_GET_DEV_RES_RATIO] = udma_get_dev_resource_ratio, + [UDMA_USER_CTL_NPU_REGISTER_INFO_CB] = udma_register_npu_cb, + [UDMA_USER_CTL_NPU_UNREGISTER_INFO_CB] = udma_unregister_npu_cb, + [UDMA_USER_CTL_QUERY_TP_SPORT] = udma_ctrlq_query_tp_sport, + [UDMA_USER_CTL_QUERY_CQE_AUX_INFO] = udma_query_cqe_aux_info, + [UDMA_USER_CTL_QUERY_AE_AUX_INFO] = udma_query_ae_aux_info, + [UDMA_USER_CTL_QUERY_UBMEM_INFO] = udma_ctrlq_query_ubmem_info, + [UDMA_USER_CTL_QUERY_PAIR_DEVNUM] = udma_query_pair_dev_count, +}; + +static udma_user_ctl_ops g_udma_user_ctl_u_ops[] = { + [UDMA_USER_CTL_CREATE_JFS_EX] = NULL, + [UDMA_USER_CTL_DELETE_JFS_EX] = NULL, + [UDMA_USER_CTL_CREATE_JFC_EX] = NULL, + [UDMA_USER_CTL_DELETE_JFC_EX] = NULL, + [UDMA_USER_CTL_SET_CQE_ADDR] = NULL, + [UDMA_USER_CTL_QUERY_UE_INFO] = NULL, + [UDMA_USER_CTL_GET_DEV_RES_RATIO] = NULL, + [UDMA_USER_CTL_NPU_REGISTER_INFO_CB] = NULL, + [UDMA_USER_CTL_NPU_UNREGISTER_INFO_CB] = NULL, + [UDMA_USER_CTL_QUERY_TP_SPORT] = udma_ctrlq_query_tp_sport, + [UDMA_USER_CTL_QUERY_CQE_AUX_INFO] = udma_query_cqe_aux_info, + [UDMA_USER_CTL_QUERY_AE_AUX_INFO] = udma_query_ae_aux_info, + [UDMA_USER_CTL_QUERY_UBMEM_INFO] = NULL, + [UDMA_USER_CTL_QUERY_PAIR_DEVNUM] = NULL, +}; + +static int udma_user_data(struct ubcore_device *dev, + struct ubcore_user_ctl *k_user_ctl) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct ubcore_user_ctl_out out = {}; + struct ubcore_user_ctl_in in = {}; + unsigned long byte; + int ret; + + if (k_user_ctl->in.len >= UDMA_HW_PAGE_SIZE || k_user_ctl->out.len >= UDMA_HW_PAGE_SIZE) { + dev_err(udev->dev, "The len exceeds the maximum value in user ctrl.\n"); + return -EINVAL; + } + + in.opcode = k_user_ctl->in.opcode; + if (!g_udma_user_ctl_u_ops[in.opcode]) { + dev_err(udev->dev, "invalid user opcode: 0x%x.\n", in.opcode); + return -EINVAL; + } + + if (k_user_ctl->in.len) { + in.addr = (uint64_t)kzalloc(k_user_ctl->in.len, GFP_KERNEL); + if (!in.addr) + return -ENOMEM; + + in.len = k_user_ctl->in.len; + byte = copy_from_user((void *)(uintptr_t)in.addr, + (void __user *)(uintptr_t)k_user_ctl->in.addr, + k_user_ctl->in.len); + if (byte) { + dev_err(udev->dev, + "failed to copy user data in user ctrl, byte = %lu.\n", byte); + kfree((void *)in.addr); + return -EFAULT; + } + } + + if (k_user_ctl->out.len) { + out.addr = (uint64_t)kzalloc(k_user_ctl->out.len, GFP_KERNEL); + if (!out.addr) { + kfree((void *)in.addr); + + return -ENOMEM; + } + out.len = k_user_ctl->out.len; + + if (k_user_ctl->out.addr) { + byte = copy_from_user((void *)(uintptr_t)out.addr, + (void __user *)(uintptr_t)k_user_ctl->out.addr, + k_user_ctl->out.len); + if (byte) { + dev_err(udev->dev, + "failed to copy user data out user ctrl, byte = %lu.\n", + byte); + kfree((void *)out.addr); + kfree((void *)in.addr); + + return -EFAULT; + } + } + } + + ret = g_udma_user_ctl_u_ops[in.opcode](dev, k_user_ctl->uctx, &in, &out); + kfree((void *)in.addr); + + if (out.addr) { + byte = copy_to_user((void __user *)(uintptr_t)k_user_ctl->out.addr, + (void *)(uintptr_t)out.addr, min(out.len, k_user_ctl->out.len)); + if (byte) { + dev_err(udev->dev, + "copy resp to user failed in user ctrl, byte = %lu.\n", byte); + ret = -EFAULT; + } + + kfree((void *)out.addr); + } + + return ret; +} + +int udma_user_ctl(struct ubcore_device *dev, struct ubcore_user_ctl *k_user_ctl) +{ + struct udma_dev *udev; + + if (dev == NULL || k_user_ctl == NULL) + return -EINVAL; + + udev = to_udma_dev(dev); + + if (k_user_ctl->in.opcode >= UDMA_USER_CTL_MAX) { + dev_err(udev->dev, "invalid opcode: 0x%x.\n", k_user_ctl->in.opcode); + return -EINVAL; + } + + if (k_user_ctl->uctx) + return udma_user_data(dev, k_user_ctl); + + if (!g_udma_user_ctl_k_ops[k_user_ctl->in.opcode]) { + dev_err(udev->dev, "invalid user opcode: 0x%x.\n", k_user_ctl->in.opcode); + return -EINVAL; + } + + return g_udma_user_ctl_k_ops[k_user_ctl->in.opcode](dev, k_user_ctl->uctx, &k_user_ctl->in, + &k_user_ctl->out); +} diff --git a/drivers/ub/urma/hw/udma/udma_ctrlq_tp.c b/drivers/ub/urma/hw/udma/udma_ctrlq_tp.c index af1732e1629b573390162154ce04a753edaf8c5f..966dc7a41d94797c3b47d2f1a12f60d18a3b810b 100644 --- a/drivers/ub/urma/hw/udma/udma_ctrlq_tp.c +++ b/drivers/ub/urma/hw/udma/udma_ctrlq_tp.c @@ -115,6 +115,610 @@ int udma_ctrlq_tp_flush_done(struct udma_dev *udev, uint32_t tpn) return ret; } +int udma_get_dev_resource_ratio(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev_resource_ratio dev_res = {}; + struct udma_dev_pair_info dev_res_out = {}; + struct udma_dev *udev = to_udma_dev(dev); + struct ubase_ctrlq_msg ctrlq_msg = {}; + int ret = 0; + + if (udma_check_base_param(in->addr, in->len, sizeof(dev_res.index))) { + dev_err(udev->dev, "parameter invalid in get dev res, len = %u.\n", in->len); + return -EINVAL; + } + + if (out->addr == 0 || out->len != sizeof(dev_res_out)) { + dev_err(udev->dev, "get dev resource ratio, addr is NULL:%d, len:%u.\n", + out->addr == 0, out->len); + return -EINVAL; + } + + memcpy(&dev_res.index, (void *)(uintptr_t)in->addr, sizeof(dev_res.index)); + + ret = ubase_get_bus_eid(udev->comdev.adev, &dev_res.eid); + if (ret) { + dev_err(udev->dev, "get dev bus eid failed, ret is %d.\n", ret); + return ret; + } + + ctrlq_msg.service_type = UBASE_CTRLQ_SER_TYPE_DEV_REGISTER; + ctrlq_msg.service_ver = UBASE_CTRLQ_SER_VER_01; + ctrlq_msg.need_resp = 1; + ctrlq_msg.in_size = sizeof(dev_res); + ctrlq_msg.in = (void *)&dev_res; + ctrlq_msg.out_size = sizeof(dev_res_out); + ctrlq_msg.out = &dev_res_out; + ctrlq_msg.opcode = UDMA_CTRLQ_GET_DEV_RESOURCE_RATIO; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &ctrlq_msg); + if (ret) { + dev_err(udev->dev, "get dev res send ctrlq msg failed, ret is %d.\n", ret); + return ret; + } + memcpy((void *)(uintptr_t)out->addr, &dev_res_out, sizeof(dev_res_out)); + + return ret; +} + +static int udma_dev_res_ratio_ctrlq_handler(struct auxiliary_device *adev, + uint8_t service_ver, void *data, + uint16_t len, uint16_t seq) +{ + struct udma_dev *udev = (struct udma_dev *)get_udma_dev(adev); + struct udma_ctrlq_event_nb *udma_cb; + int ret; + + mutex_lock(&udev->npu_nb_mutex); + udma_cb = xa_load(&udev->npu_nb_table, UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO); + if (!udma_cb) { + dev_err(udev->dev, "failed to query npu info cb while xa_load.\n"); + mutex_unlock(&udev->npu_nb_mutex); + return -EINVAL; + } + + ret = udma_cb->crq_handler(&udev->ub_dev, data, len); + if (ret) + dev_err(udev->dev, "npu crq handler failed, ret = %d.\n", ret); + mutex_unlock(&udev->npu_nb_mutex); + + return ret; +} + +int udma_register_npu_cb(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct ubase_ctrlq_event_nb ubase_cb = {}; + struct udma_dev *udev = to_udma_dev(dev); + struct udma_ctrlq_event_nb *udma_cb; + int ret; + + if (udma_check_base_param(in->addr, in->len, sizeof(udma_cb->crq_handler))) { + dev_err(udev->dev, "parameter invalid in register npu cb, len = %u.\n", in->len); + return -EINVAL; + } + + udma_cb = kzalloc(sizeof(*udma_cb), GFP_KERNEL); + if (!udma_cb) + return -ENOMEM; + + udma_cb->opcode = UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO; + udma_cb->crq_handler = (void *)(uintptr_t)in->addr; + + mutex_lock(&udev->npu_nb_mutex); + if (xa_load(&udev->npu_nb_table, UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO)) { + dev_err(udev->dev, "query npu info callback exist.\n"); + ret = -EINVAL; + goto err_release_udma_cb; + } + ret = xa_err(__xa_store(&udev->npu_nb_table, udma_cb->opcode, udma_cb, GFP_KERNEL)); + if (ret) { + dev_err(udev->dev, + "save crq nb entry failed, opcode is %u, ret is %d.\n", + udma_cb->opcode, ret); + goto err_release_udma_cb; + } + + ubase_cb.service_type = UBASE_CTRLQ_SER_TYPE_DEV_REGISTER; + ubase_cb.opcode = UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO; + ubase_cb.back = udev->comdev.adev; + ubase_cb.crq_handler = udma_dev_res_ratio_ctrlq_handler; + ret = ubase_ctrlq_register_crq_event(udev->comdev.adev, &ubase_cb); + if (ret) { + __xa_erase(&udev->npu_nb_table, UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO); + dev_err(udev->dev, "ubase register npu crq event failed, ret is %d.\n", ret); + goto err_release_udma_cb; + } + mutex_unlock(&udev->npu_nb_mutex); + + return 0; + +err_release_udma_cb: + mutex_unlock(&udev->npu_nb_mutex); + kfree(udma_cb); + return ret; +} + +int udma_unregister_npu_cb(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct udma_ctrlq_event_nb *nb; + + ubase_ctrlq_unregister_crq_event(udev->comdev.adev, + UBASE_CTRLQ_SER_TYPE_DEV_REGISTER, + UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO); + + mutex_lock(&udev->npu_nb_mutex); + nb = xa_load(&udev->npu_nb_table, UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO); + if (!nb) { + dev_warn(udev->dev, "query npu info cb not exist.\n"); + goto err_find_npu_nb; + } + + __xa_erase(&udev->npu_nb_table, UDMA_CTRLQ_NOTIFY_DEV_RESOURCE_RATIO); + kfree(nb); + nb = NULL; + +err_find_npu_nb: + mutex_unlock(&udev->npu_nb_mutex); + return 0; +} + +static int udma_ctrlq_get_trans_type(struct udma_dev *dev, + enum ubcore_transport_mode trans_mode, + enum udma_ctrlq_trans_type *tp_type) +{ +#define UDMA_TRANS_MODE_NUM 5 + +struct udma_ctrlq_trans_map { + bool is_valid; + enum udma_ctrlq_trans_type tp_type; +}; + static struct udma_ctrlq_trans_map ctrlq_trans_map[UDMA_TRANS_MODE_NUM] = { + {false, UDMA_CTRLQ_TRANS_TYPE_MAX}, + {true, UDMA_CTRLQ_TRANS_TYPE_TP_RM}, + {true, UDMA_CTRLQ_TRANS_TYPE_TP_RC}, + {false, UDMA_CTRLQ_TRANS_TYPE_MAX}, + {true, UDMA_CTRLQ_TRANS_TYPE_TP_UM}, + }; + uint8_t transport_mode = (uint8_t)trans_mode; + + if ((transport_mode < UDMA_TRANS_MODE_NUM) && + ctrlq_trans_map[transport_mode].is_valid) { + *tp_type = ctrlq_trans_map[transport_mode].tp_type; + return 0; + } + + dev_err(dev->dev, "the trans_mode %u is not support.\n", trans_mode); + + return -EINVAL; +} + +static int udma_send_req_to_mue(struct udma_dev *dev, union ubcore_tp_handle *tp_handle) +{ + uint32_t data_len = (uint32_t)sizeof(struct udma_ue_tp_info); + struct udma_ue_tp_info *data; + struct ubcore_req *req_msg; + int ret; + + req_msg = kzalloc(sizeof(*req_msg) + data_len, GFP_KERNEL); + if (!req_msg) + return -ENOMEM; + + data = (struct udma_ue_tp_info *)req_msg->data; + data->start_tpn = tp_handle->bs.tpn_start; + data->tp_cnt = tp_handle->bs.tp_cnt; + req_msg->len = data_len; + ret = send_req_to_mue(dev, req_msg, UDMA_CMD_NOTIFY_MUE_SAVE_TP); + if (ret) + dev_err(dev->dev, "fail to notify mue save tp, ret %d.\n", ret); + + kfree(req_msg); + + return ret; +} + +static int udma_ctrlq_store_one_tpid(struct udma_dev *udev, struct xarray *ctrlq_tpid_table, + struct udma_ctrlq_tpid *tpid) +{ + struct udma_ctrlq_tpid *tpid_entity; + int ret; + + if (debug_switch) + dev_info(udev->dev, "udma ctrlq store one tpid start. tpid %u\n", tpid->tpid); + + if (xa_load(ctrlq_tpid_table, tpid->tpid)) { + dev_warn(udev->dev, + "the tpid already exists in ctrlq tpid table, tpid = %u.\n", + tpid->tpid); + return 0; + } + + tpid_entity = kzalloc(sizeof(*tpid_entity), GFP_KERNEL); + if (!tpid_entity) + return -ENOMEM; + + memcpy(tpid_entity, tpid, sizeof(*tpid)); + + ret = xa_err(xa_store(ctrlq_tpid_table, tpid->tpid, tpid_entity, GFP_KERNEL)); + if (ret) { + dev_err(udev->dev, + "store tpid entity failed, ret = %d, tpid = %u.\n", + ret, tpid->tpid); + kfree(tpid_entity); + } + + return ret; +} + +static void udma_ctrlq_erase_one_tpid(struct xarray *ctrlq_tpid_table, + uint32_t tpid) +{ + struct udma_ctrlq_tpid *tpid_entity; + + xa_lock(ctrlq_tpid_table); + tpid_entity = xa_load(ctrlq_tpid_table, tpid); + if (!tpid_entity) { + xa_unlock(ctrlq_tpid_table); + return; + } + __xa_erase(ctrlq_tpid_table, tpid); + kfree(tpid_entity); + xa_unlock(ctrlq_tpid_table); +} + +static int udma_ctrlq_get_tpid_list(struct udma_dev *udev, + struct udma_ctrlq_get_tp_list_req_data *tp_cfg_req, + struct ubcore_get_tp_cfg *tpid_cfg, + struct udma_ctrlq_tpid_list_rsp *tpid_list_resp) +{ + enum udma_ctrlq_trans_type trans_type; + struct ubase_ctrlq_msg msg = {}; + int ret; + + if (!tpid_cfg->flag.bs.ctp) { + if (udma_ctrlq_get_trans_type(udev, tpid_cfg->trans_mode, &trans_type) != 0) { + dev_err(udev->dev, "udma get ctrlq trans_type failed, trans_mode = %d.\n", + tpid_cfg->trans_mode); + return -EINVAL; + } + + tp_cfg_req->trans_type = (uint32_t)trans_type; + } else { + tp_cfg_req->trans_type = UDMA_CTRLQ_TRANS_TYPE_CTP; + } + + udma_swap_endian(tpid_cfg->local_eid.raw, tp_cfg_req->seid, + UDMA_EID_SIZE); + udma_swap_endian(tpid_cfg->peer_eid.raw, tp_cfg_req->deid, + UDMA_EID_SIZE); + + udma_ctrlq_set_tp_msg(&msg, (void *)tp_cfg_req, sizeof(*tp_cfg_req), + (void *)tpid_list_resp, sizeof(*tpid_list_resp)); + msg.opcode = UDMA_CMD_CTRLQ_GET_TP_LIST; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &msg); + if (ret) + dev_err(udev->dev, "ctrlq send msg failed, ret = %d.\n", ret); + + return ret; +} + +static int udma_ctrlq_store_tpid_list(struct udma_dev *udev, + struct xarray *ctrlq_tpid_table, + struct udma_ctrlq_tpid_list_rsp *tpid_list_resp) +{ + int ret; + int i; + + if (debug_switch) + dev_info(udev->dev, "udma ctrlq store tpid list tp_list_cnt = %u.\n", + tpid_list_resp->tp_list_cnt); + + for (i = 0; i < (int)tpid_list_resp->tp_list_cnt; i++) { + ret = udma_ctrlq_store_one_tpid(udev, ctrlq_tpid_table, + &tpid_list_resp->tpid_list[i]); + if (ret) + goto err_store_one_tpid; + } + + return 0; + +err_store_one_tpid: + for (i--; i >= 0; i--) + udma_ctrlq_erase_one_tpid(ctrlq_tpid_table, tpid_list_resp->tpid_list[i].tpid); + + return ret; +} + +int udma_get_tp_list(struct ubcore_device *dev, struct ubcore_get_tp_cfg *tpid_cfg, + uint32_t *tp_cnt, struct ubcore_tp_info *tp_list, + struct ubcore_udata *udata) +{ + struct udma_ctrlq_get_tp_list_req_data tp_cfg_req = {}; + struct udma_ctrlq_tpid_list_rsp tpid_list_resp = {}; + struct udma_dev *udev = to_udma_dev(dev); + int ret; + int i; + + if (!udata) + tp_cfg_req.flag = UDMA_DEFAULT_PID; + else + tp_cfg_req.flag = (uint32_t)current->tgid & UDMA_PID_MASK; + + ret = udma_ctrlq_get_tpid_list(udev, &tp_cfg_req, tpid_cfg, &tpid_list_resp); + if (ret) { + dev_err(udev->dev, "udma ctrlq get tpid list failed, ret = %d.\n", ret); + return ret; + } + + if (tpid_list_resp.tp_list_cnt == 0 || tpid_list_resp.tp_list_cnt > *tp_cnt) { + dev_err(udev->dev, + "check tp list count failed, count = %u.\n", + tpid_list_resp.tp_list_cnt); + return -EINVAL; + } + + for (i = 0; i < tpid_list_resp.tp_list_cnt; i++) { + tp_list[i].tp_handle.bs.tpid = tpid_list_resp.tpid_list[i].tpid; + tp_list[i].tp_handle.bs.tpn_start = tpid_list_resp.tpid_list[i].tpn_start; + tp_list[i].tp_handle.bs.tp_cnt = + tpid_list_resp.tpid_list[i].tpn_cnt & UDMA_TPN_CNT_MASK; + } + *tp_cnt = tpid_list_resp.tp_list_cnt; + + ret = udma_ctrlq_store_tpid_list(udev, &udev->ctrlq_tpid_table, &tpid_list_resp); + if (ret) + dev_err(udev->dev, "udma ctrlq store list failed, ret = %d.\n", ret); + + return ret; +} + +void udma_ctrlq_destroy_tpid_list(struct udma_dev *dev, struct xarray *ctrlq_tpid_table, + bool is_need_flush) +{ + struct udma_ctrlq_tpid *tpid_entity = NULL; + unsigned long tpid = 0; + + xa_lock(ctrlq_tpid_table); + if (!xa_empty(ctrlq_tpid_table)) { + xa_for_each(ctrlq_tpid_table, tpid, tpid_entity) { + __xa_erase(ctrlq_tpid_table, tpid); + kfree(tpid_entity); + } + } + xa_unlock(ctrlq_tpid_table); + xa_destroy(ctrlq_tpid_table); +} + +static int udma_k_ctrlq_create_active_tp_msg(struct udma_dev *udev, + struct ubcore_active_tp_cfg *active_cfg, + uint32_t *tp_id) +{ + struct udma_ctrlq_active_tp_resp_data active_tp_resp = {}; + struct udma_ctrlq_active_tp_req_data active_tp_req = {}; + struct ubase_ctrlq_msg msg = {}; + int ret; + + active_tp_req.local_tp_id = active_cfg->tp_handle.bs.tpid; + active_tp_req.local_tpn_cnt = active_cfg->tp_handle.bs.tp_cnt; + active_tp_req.local_tpn_start = active_cfg->tp_handle.bs.tpn_start; + active_tp_req.local_psn = active_cfg->tp_attr.tx_psn; + + active_tp_req.remote_tp_id = active_cfg->peer_tp_handle.bs.tpid; + active_tp_req.remote_tpn_cnt = active_cfg->peer_tp_handle.bs.tp_cnt; + active_tp_req.remote_tpn_start = active_cfg->peer_tp_handle.bs.tpn_start; + active_tp_req.remote_psn = active_cfg->tp_attr.rx_psn; + + if (debug_switch) + udma_dfx_ctx_print(udev, "udma create active tp msg info", + active_tp_req.local_tp_id, + sizeof(struct udma_ctrlq_active_tp_req_data) / sizeof(uint32_t), + (uint32_t *)&active_tp_req); + + msg.opcode = UDMA_CMD_CTRLQ_ACTIVE_TP; + udma_ctrlq_set_tp_msg(&msg, (void *)&active_tp_req, sizeof(active_tp_req), + (void *)&active_tp_resp, sizeof(active_tp_resp)); + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &msg); + if (ret) + dev_err(udev->dev, "udma active tp send failed, ret = %d.\n", ret); + + *tp_id = active_tp_resp.local_tp_id; + + return ret; +} + +int udma_ctrlq_set_active_tp_ex(struct udma_dev *dev, + struct ubcore_active_tp_cfg *active_cfg) +{ + uint32_t tp_id = active_cfg->tp_handle.bs.tpid; + int ret; + + ret = udma_k_ctrlq_create_active_tp_msg(dev, active_cfg, &tp_id); + if (ret) + return ret; + + active_cfg->tp_handle.bs.tpid = tp_id; + + if (dev->is_ue) + (void)udma_send_req_to_mue(dev, &(active_cfg->tp_handle)); + + return 0; +} + +static int udma_k_ctrlq_deactive_tp(struct udma_dev *udev, union ubcore_tp_handle tp_handle, + struct ubcore_udata *udata) +{ +#define UDMA_RSP_TP_MUL 2 + uint32_t tp_id = tp_handle.bs.tpid & UDMA_TPHANDLE_TPID_SHIFT; + struct udma_ctrlq_deactive_tp_req_data deactive_tp_req = {}; + uint32_t tp_num = tp_handle.bs.tp_cnt; + struct ubase_ctrlq_msg msg = {}; + int ret; + + if (tp_num) { + ret = udma_close_ue_rx(udev, true, false, false, tp_num * UDMA_RSP_TP_MUL); + if (ret) { + dev_err(udev->dev, "close ue rx failed in deactivate tp.\n"); + return ret; + } + } + + msg.opcode = UDMA_CMD_CTRLQ_DEACTIVE_TP; + deactive_tp_req.tp_id = tp_id; + deactive_tp_req.tpn_cnt = tp_handle.bs.tp_cnt; + deactive_tp_req.start_tpn = tp_handle.bs.tpn_start; + if (!udata) + deactive_tp_req.pid_flag = UDMA_DEFAULT_PID; + else + deactive_tp_req.pid_flag = (uint32_t)current->tgid & UDMA_PID_MASK; + + udma_ctrlq_set_tp_msg(&msg, (void *)&deactive_tp_req, sizeof(deactive_tp_req), NULL, 0); + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &msg); + if (ret != -EAGAIN && ret) { + dev_err(udev->dev, "deactivate tp send msg failed, tp_id = %u, ret = %d.\n", + tp_id, ret); + if (tp_num) + udma_open_ue_rx(udev, true, false, false, tp_num * UDMA_RSP_TP_MUL); + return ret; + } + + udma_ctrlq_erase_one_tpid(&udev->ctrlq_tpid_table, tp_id); + + return (ret == -EAGAIN) ? 0 : ret; +} + +int udma_ctrlq_query_ubmem_info(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ +#define UDMA_CTRLQ_SER_TYPE_UBMEM 0x5 + struct udma_ctrlq_ubmem_out_query ubmem_info_out = {}; + struct udma_dev *udev = to_udma_dev(dev); + struct ubase_ctrlq_msg ctrlq_msg = {}; + uint32_t input_buf = 0; + int ret; + + if (out->addr == 0 || out->len != sizeof(struct udma_ctrlq_ubmem_out_query)) { + dev_err(udev->dev, "query ubmem info failed, addr is NULL:%d, len:%u.\n", + out->addr == 0, out->len); + return -EINVAL; + } + + ctrlq_msg.service_type = UDMA_CTRLQ_SER_TYPE_UBMEM; + ctrlq_msg.service_ver = UBASE_CTRLQ_SER_VER_01; + ctrlq_msg.need_resp = 1; + ctrlq_msg.in_size = sizeof(input_buf); + ctrlq_msg.in = (void *)&input_buf; + ctrlq_msg.out_size = sizeof(ubmem_info_out); + ctrlq_msg.out = &ubmem_info_out; + ctrlq_msg.opcode = UDMA_CTRLQ_QUERY_UBMEM_INFO; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &ctrlq_msg); + if (ret) { + dev_err(udev->dev, "get dev res send ctrlq msg failed, ret is %d.\n", ret); + return ret; + } + + memcpy((void *)(uintptr_t)out->addr, &ubmem_info_out, sizeof(ubmem_info_out)); + + return ret; +} + +int udma_set_tp_attr(struct ubcore_device *dev, const uint64_t tp_handle, + const uint8_t tp_attr_cnt, const uint32_t tp_attr_bitmap, + const struct ubcore_tp_attr_value *tp_attr, struct ubcore_udata *udata) +{ + struct udma_ctrlq_set_tp_attr_req tp_attr_req = {}; + struct udma_dev *udev = to_udma_dev(dev); + union ubcore_tp_handle tp_handle_val; + struct ubase_ctrlq_msg msg = {}; + int ret; + + tp_handle_val.value = tp_handle; + tp_attr_req.tpid = tp_handle_val.bs.tpid; + tp_attr_req.tpn_cnt = tp_handle_val.bs.tp_cnt; + tp_attr_req.tpn_start = tp_handle_val.bs.tpn_start; + tp_attr_req.tp_attr_cnt = tp_attr_cnt; + tp_attr_req.tp_attr.tp_attr_bitmap = tp_attr_bitmap; + memcpy(&tp_attr_req.tp_attr.tp_attr_value, (void *)tp_attr, sizeof(*tp_attr)); + + udma_ctrlq_set_tp_msg(&msg, &tp_attr_req, sizeof(tp_attr_req), NULL, 0); + msg.opcode = UDMA_CMD_CTRLQ_SET_TP_ATTR; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &msg); + if (ret) + dev_err(udev->dev, "set tp attr failed, tpid = %u, ret = %d.\n", + tp_attr_req.tpid, ret); + + return ret; +} + +int udma_get_tp_attr(struct ubcore_device *dev, const uint64_t tp_handle, + uint8_t *tp_attr_cnt, uint32_t *tp_attr_bitmap, + struct ubcore_tp_attr_value *tp_attr, struct ubcore_udata *udata) +{ + struct udma_ctrlq_get_tp_attr_resp tp_attr_resp = {}; + struct udma_ctrlq_get_tp_attr_req tp_attr_req = {}; + struct udma_dev *udev = to_udma_dev(dev); + union ubcore_tp_handle tp_handle_val; + struct ubase_ctrlq_msg msg = {}; + int ret; + + tp_handle_val.value = tp_handle; + tp_attr_req.tpid.tpid = tp_handle_val.bs.tpid; + tp_attr_req.tpid.tpn_cnt = tp_handle_val.bs.tp_cnt; + tp_attr_req.tpid.tpn_start = tp_handle_val.bs.tpn_start; + udma_ctrlq_set_tp_msg(&msg, &tp_attr_req, sizeof(tp_attr_req), &tp_attr_resp, + sizeof(tp_attr_resp)); + msg.opcode = UDMA_CMD_CTRLQ_GET_TP_ATTR; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &msg); + if (ret) { + dev_err(udev->dev, "get tp attr failed, tpid = %u, ret = %d.\n", + tp_attr_req.tpid.tpid, ret); + return ret; + } + + *tp_attr_cnt = tp_attr_resp.tp_attr_cnt; + *tp_attr_bitmap = tp_attr_resp.tp_attr.tp_attr_bitmap; + memcpy((void *)tp_attr, &tp_attr_resp.tp_attr.tp_attr_value, + sizeof(tp_attr_resp.tp_attr.tp_attr_value)); + + return 0; +} + +int send_req_to_mue(struct udma_dev *udma_dev, struct ubcore_req *req, uint16_t opcode) +{ + struct udma_req_msg *req_msg; + struct ubase_cmd_buf in; + uint32_t msg_len; + int ret; + + msg_len = sizeof(*req_msg) + req->len; + req_msg = kzalloc(msg_len, GFP_KERNEL); + if (!req_msg) + return -ENOMEM; + + req_msg->resp_code = opcode; + + (void)memcpy(&req_msg->req, req, sizeof(*req)); + (void)memcpy(req_msg->req.data, req->data, req->len); + udma_fill_buf(&in, UBASE_OPC_UE_TO_MUE, false, msg_len, req_msg); + + ret = ubase_cmd_send_in(udma_dev->comdev.adev, &in); + if (ret) + dev_err(udma_dev->dev, + "send req msg cmd failed, ret is %d.\n", ret); + + kfree(req_msg); + + return ret; +} + int send_resp_to_ue(struct udma_dev *udma_dev, struct ubcore_resp *req_host, uint8_t dst_ue_idx, uint16_t opcode) { @@ -145,3 +749,71 @@ int send_resp_to_ue(struct udma_dev *udma_dev, struct ubcore_resp *req_host, return ret; } + +int udma_active_tp(struct ubcore_device *dev, struct ubcore_active_tp_cfg *active_cfg) +{ + struct udma_dev *udma_dev = to_udma_dev(dev); + int ret; + + if (debug_switch) + udma_dfx_ctx_print(udma_dev, "udma active tp ex", active_cfg->tp_handle.bs.tpid, + sizeof(struct ubcore_active_tp_cfg) / sizeof(uint32_t), + (uint32_t *)active_cfg); + ret = udma_ctrlq_set_active_tp_ex(udma_dev, active_cfg); + if (ret) + dev_err(udma_dev->dev, "Failed to set active tp msg, ret %d.\n", ret); + + return ret; +} + +int udma_deactive_tp(struct ubcore_device *dev, union ubcore_tp_handle tp_handle, + struct ubcore_udata *udata) +{ + struct udma_dev *udma_dev = to_udma_dev(dev); + + if (debug_switch) + dev_info(udma_dev->dev, "udma deactivate tp ex tp_id = %u\n", tp_handle.bs.tpid); + + return udma_k_ctrlq_deactive_tp(udma_dev, tp_handle, udata); +} + +int udma_query_pair_dev_count(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out) +{ + struct udma_dev *udev = to_udma_dev(dev); + struct ubase_ctrlq_msg ctrlq_msg = {}; + struct ubase_bus_eid eid = {}; + uint32_t pair_device_num = 0; + int ret; + + if (out->addr == 0 || out->len != sizeof(pair_device_num)) { + dev_err(udev->dev, "query pair dev count, addr is NULL:%d, len:%u.\n", + out->addr == 0, out->len); + return -EINVAL; + } + + ret = ubase_get_bus_eid(udev->comdev.adev, &eid); + if (ret) { + dev_err(udev->dev, "get dev bus eid failed, ret is %d.\n", ret); + return ret; + } + + ctrlq_msg.service_type = UBASE_CTRLQ_SER_TYPE_DEV_REGISTER; + ctrlq_msg.service_ver = UBASE_CTRLQ_SER_VER_01; + ctrlq_msg.need_resp = 1; + ctrlq_msg.in_size = sizeof(eid); + ctrlq_msg.in = (void *)&eid; + ctrlq_msg.out_size = sizeof(pair_device_num); + ctrlq_msg.out = &pair_device_num; + ctrlq_msg.opcode = UDMA_CTRLQ_GET_DEV_RESOURCE_COUNT; + + ret = ubase_ctrlq_send_msg(udev->comdev.adev, &ctrlq_msg); + if (ret) { + dev_err(udev->dev, "get dev res send ctrlq msg failed, ret is %d.\n", ret); + return ret; + } + + memcpy((void *)(uintptr_t)out->addr, &pair_device_num, sizeof(pair_device_num)); + + return ret; +} diff --git a/drivers/ub/urma/hw/udma/udma_ctrlq_tp.h b/drivers/ub/urma/hw/udma/udma_ctrlq_tp.h index 6672f8ea01ecad681ae337708bf1fe637718c371..bfa3ed44c381ba24473946e6bf87d7e73a9e3a1c 100644 --- a/drivers/ub/urma/hw/udma/udma_ctrlq_tp.h +++ b/drivers/ub/urma/hw/udma/udma_ctrlq_tp.h @@ -8,7 +8,14 @@ #define UDMA_EID_SIZE 16 #define UDMA_CNA_SIZE 16 +#define UDMA_PID_MASK 24 +#define UDMA_DEFAULT_PID 1 #define UDMA_UE_NUM 64 +#define UDMA_MAX_UE_IDX 256 +#define UDMA_MAX_TPID_NUM 5 + +#define UDMA_CTRLQ_UBMEM_INFO_NUM (96) +#define UDMA_TPN_CNT_MASK 0x1F enum udma_ctrlq_cmd_code_type { UDMA_CMD_CTRLQ_REMOVE_SINGLE_TP = 0x13, @@ -22,6 +29,10 @@ enum udma_ctrlq_cmd_code_type { UDMA_CMD_CTRLQ_MAX }; +enum udma_ctrlq_ubmem_opcode { + UDMA_CTRLQ_QUERY_UBMEM_INFO = 0x1, +}; + enum udma_ctrlq_trans_type { UDMA_CTRLQ_TRANS_TYPE_TP_RM = 0, UDMA_CTRLQ_TRANS_TYPE_CTP, @@ -36,6 +47,48 @@ enum udma_ctrlq_tpid_status { UDMA_CTRLQ_TPID_IDLE, }; +struct udma_ctrlq_tpid { + uint32_t tpid : 24; + uint32_t tpn_cnt : 8; + uint32_t tpn_start : 24; + uint32_t rsv : 8; +}; + +struct udma_ctrlq_tpid_list_rsp { + uint32_t tp_list_cnt : 16; + uint32_t rsv : 16; + struct udma_ctrlq_tpid tpid_list[UDMA_MAX_TPID_NUM]; +}; + +struct udma_ctrlq_active_tp_req_data { + uint32_t local_tp_id : 24; + uint32_t local_tpn_cnt : 8; + uint32_t local_tpn_start : 24; + uint32_t rsv : 8; + uint32_t remote_tp_id : 24; + uint32_t remote_tpn_cnt : 8; + uint32_t remote_tpn_start : 24; + uint32_t rsv1 : 8; + uint32_t local_psn; + uint32_t remote_psn; +}; + +struct udma_ctrlq_active_tp_resp_data { + uint32_t local_tp_id : 24; + uint32_t local_tpn_cnt : 8; + uint32_t local_tpn_start : 24; + uint32_t rsv : 8; +}; + +struct udma_ctrlq_deactive_tp_req_data { + uint32_t tp_id : 24; + uint32_t tpn_cnt : 8; + uint32_t start_tpn : 24; + uint32_t rsv : 8; + uint32_t pid_flag : 24; + uint32_t rsv1 : 8; +}; + struct udma_ctrlq_tp_flush_done_req_data { uint32_t tpn : 24; uint32_t rsv : 8; @@ -79,6 +132,14 @@ struct udma_ctrlq_check_tp_active_rsp_info { struct udma_ctrlq_check_tp_active_rsp_data data[]; }; +struct udma_ctrlq_get_tp_list_req_data { + uint8_t seid[UDMA_EID_SIZE]; + uint8_t deid[UDMA_EID_SIZE]; + uint32_t trans_type : 4; + uint32_t rsv : 4; + uint32_t flag : 24; +}; + enum udma_cmd_ue_opcode { UDMA_CMD_UBCORE_COMMAND = 0x1, UDMA_CMD_NOTIFY_MUE_SAVE_TP = 0x2, @@ -95,13 +156,73 @@ struct udma_ue_idx_table { uint8_t ue_idx[UDMA_UE_NUM]; }; -struct udma_notify_flush_done { - uint32_t tpn; +struct udma_ctrlq_ubmem_out_query { + uint32_t data[UDMA_CTRLQ_UBMEM_INFO_NUM]; +}; + +struct udma_ctrlq_tp_attr { + uint32_t tp_attr_bitmap; + struct ubcore_tp_attr_value tp_attr_value; +}; + +struct udma_ctrlq_get_tp_attr_req { + struct udma_ctrlq_tpid tpid; }; +struct udma_ctrlq_set_tp_attr_req { + uint32_t tpid : 24; + uint32_t tpn_cnt : 8; + uint32_t tpn_start : 24; + uint32_t tp_attr_cnt : 8; + struct udma_ctrlq_tp_attr tp_attr; +}; + +struct udma_ctrlq_get_tp_attr_resp { + uint32_t tpid : 24; + uint32_t tp_attr_cnt : 8; + struct udma_ctrlq_tp_attr tp_attr; +}; + +struct udma_dev_resource_ratio { + struct ubase_bus_eid eid; + uint32_t index; +}; + +int udma_query_pair_dev_count(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out); + +int udma_get_dev_resource_ratio(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out); + +int udma_register_npu_cb(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out); + +int udma_unregister_npu_cb(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out); int udma_ctrlq_tp_flush_done(struct udma_dev *udev, uint32_t tpn); int udma_ctrlq_remove_single_tp(struct udma_dev *udev, uint32_t tpn, int status); +int udma_get_tp_list(struct ubcore_device *dev, struct ubcore_get_tp_cfg *tpid_cfg, + uint32_t *tp_cnt, struct ubcore_tp_info *tp_list, + struct ubcore_udata *udata); + +void udma_ctrlq_destroy_tpid_list(struct udma_dev *dev, struct xarray *ctrlq_tpid_table, + bool is_need_flush); +int udma_ctrlq_set_active_tp_ex(struct udma_dev *dev, + struct ubcore_active_tp_cfg *active_cfg); +int udma_ctrlq_query_ubmem_info(struct ubcore_device *dev, struct ubcore_ucontext *uctx, + struct ubcore_user_ctl_in *in, struct ubcore_user_ctl_out *out); + +int udma_set_tp_attr(struct ubcore_device *dev, const uint64_t tp_handle, + const uint8_t tp_attr_cnt, const uint32_t tp_attr_bitmap, + const struct ubcore_tp_attr_value *tp_attr, struct ubcore_udata *udata); +int udma_get_tp_attr(struct ubcore_device *dev, const uint64_t tp_handle, + uint8_t *tp_attr_cnt, uint32_t *tp_attr_bitmap, + struct ubcore_tp_attr_value *tp_attr, struct ubcore_udata *udata); int send_resp_to_ue(struct udma_dev *udma_dev, struct ubcore_resp *req_host, uint8_t dst_ue_idx, uint16_t opcode); +int send_req_to_mue(struct udma_dev *udma_dev, struct ubcore_req *req, uint16_t opcode); +int udma_active_tp(struct ubcore_device *dev, struct ubcore_active_tp_cfg *active_cfg); +int udma_deactive_tp(struct ubcore_device *dev, union ubcore_tp_handle tp_handle, + struct ubcore_udata *udata); #endif /* __UDMA_CTRLQ_TP_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_ctx.c b/drivers/ub/urma/hw/udma/udma_ctx.c index 985abb19929a1b54e7c3bfd915278c8d4c74b2b8..5f60fca10d862d6c150372faf37b472e7020cdaf 100644 --- a/drivers/ub/urma/hw/udma/udma_ctx.c +++ b/drivers/ub/urma/hw/udma/udma_ctx.c @@ -31,6 +31,7 @@ static int udma_init_ctx_resp(struct udma_dev *dev, struct ubcore_udrv_priv *udr resp.die_id = dev->die_id; resp.dump_aux_info = dump_aux_info; resp.jfr_sge = dev->caps.jfr_sge; + resp.hugepage_enable = ubase_adev_prealloc_supported(dev->comdev.adev); byte = copy_to_user((void *)(uintptr_t)udrv_data->out_addr, &resp, (uint32_t)sizeof(resp)); @@ -70,6 +71,8 @@ struct ubcore_ucontext *udma_alloc_ucontext(struct ubcore_device *ub_dev, ctx->dev = dev; INIT_LIST_HEAD(&ctx->pgdir_list); mutex_init(&ctx->pgdir_mutex); + INIT_LIST_HEAD(&ctx->hugepage_list); + mutex_init(&ctx->hugepage_lock); ret = udma_init_ctx_resp(dev, udrv_data); if (ret) { @@ -91,8 +94,11 @@ struct ubcore_ucontext *udma_alloc_ucontext(struct ubcore_device *ub_dev, int udma_free_ucontext(struct ubcore_ucontext *ucontext) { struct udma_dev *udma_dev = to_udma_dev(ucontext->ub_dev); + struct udma_hugepage_priv *priv; + struct vm_area_struct *vma; struct udma_context *ctx; int ret; + int i; ctx = to_udma_context(ucontext); @@ -103,20 +109,109 @@ int udma_free_ucontext(struct ubcore_ucontext *ucontext) mutex_destroy(&ctx->pgdir_mutex); ummu_sva_unbind_device(ctx->sva); + mutex_lock(&ctx->hugepage_lock); + list_for_each_entry(priv, &ctx->hugepage_list, list) { + if (current->mm) { + mmap_write_lock(current->mm); + vma = find_vma(current->mm, (unsigned long)priv->va_base); + if (vma != NULL && vma->vm_start <= (unsigned long)priv->va_base && + vma->vm_end >= (unsigned long)(priv->va_base + priv->va_len)) + zap_vma_ptes(vma, (unsigned long)priv->va_base, priv->va_len); + mmap_write_unlock(current->mm); + } + + dev_info(udma_dev->dev, "unmap_hugepage, 2m_page_num=%u.\n", priv->page_num); + for (i = 0; i < priv->page_num; i++) + __free_pages(priv->pages[i], get_order(UDMA_HUGEPAGE_SIZE)); + kfree(priv->pages); + kfree(priv); + } + mutex_unlock(&ctx->hugepage_lock); + mutex_destroy(&ctx->hugepage_lock); + kfree(ctx); return 0; } -int udma_mmap(struct ubcore_ucontext *uctx, struct vm_area_struct *vma) +static int udma_mmap_jetty_dsqe(struct udma_dev *dev, struct ubcore_ucontext *uctx, + struct vm_area_struct *vma) { -#define JFC_DB_UNMAP_BOUND 1 - struct udma_dev *udma_dev = to_udma_dev(uctx->ub_dev); struct ubcore_ucontext *jetty_uctx; struct udma_jetty_queue *sq; - resource_size_t db_addr; uint64_t address; uint64_t j_id; + + j_id = get_mmap_idx(vma); + + xa_lock(&dev->jetty_table.xa); + sq = xa_load(&dev->jetty_table.xa, j_id); + if (!sq) { + dev_err(dev->dev, + "mmap failed, j_id: %llu not exist\n", j_id); + xa_unlock(&dev->jetty_table.xa); + return -EINVAL; + } + + if (sq->is_jetty) + jetty_uctx = to_udma_jetty_from_queue(sq)->ubcore_jetty.uctx; + else + jetty_uctx = to_udma_jfs_from_queue(sq)->ubcore_jfs.uctx; + + if (jetty_uctx != uctx) { + dev_err(dev->dev, + "mmap failed, j_id: %llu, uctx invalid\n", j_id); + xa_unlock(&dev->jetty_table.xa); + return -EINVAL; + } + xa_unlock(&dev->jetty_table.xa); + + address = (uint64_t)dev->db_base + JETTY_DSQE_OFFSET + j_id * UDMA_HW_PAGE_SIZE; + + if (io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int udma_mmap_hugepage(struct udma_dev *dev, struct ubcore_ucontext *uctx, + struct vm_area_struct *vma) +{ + uint32_t max_map_size = dev->caps.cqe_size * dev->caps.jfc.depth; + uint32_t map_size = vma->vm_end - vma->vm_start; + + if (!IS_ALIGNED(map_size, UDMA_HUGEPAGE_SIZE)) { + dev_err(dev->dev, "mmap size is not 2m alignment.\n"); + return -EINVAL; + } + + if (map_size == 0) { + dev_err(dev->dev, "mmap size is zero.\n"); + return -EINVAL; + } + + if (map_size > max_map_size) { + dev_err(dev->dev, "mmap size(%u) is greater than the max_size.\n", + map_size); + return -EINVAL; + } + + vm_flags_set(vma, VM_IO | VM_LOCKED | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY); + vma->vm_page_prot = __pgprot(((~PTE_ATTRINDX_MASK) & vma->vm_page_prot.pgprot) | + PTE_ATTRINDX(MT_NORMAL)); + if (udma_alloc_u_hugepage(to_udma_context(uctx), vma)) { + dev_err(dev->dev, "failed to alloc hugepage.\n"); + return -ENOMEM; + } + + return 0; +} + +int udma_mmap(struct ubcore_ucontext *uctx, struct vm_area_struct *vma) +{ +#define JFC_DB_UNMAP_BOUND 1 + struct udma_dev *udma_dev = to_udma_dev(uctx->ub_dev); uint32_t cmd; if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) { @@ -125,7 +220,6 @@ int udma_mmap(struct ubcore_ucontext *uctx, struct vm_area_struct *vma) return -EINVAL; } - db_addr = udma_dev->db_base; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); cmd = get_mmap_cmd(vma); @@ -133,46 +227,170 @@ int udma_mmap(struct ubcore_ucontext *uctx, struct vm_area_struct *vma) case UDMA_MMAP_JFC_PAGE: if (io_remap_pfn_range(vma, vma->vm_start, jfc_arm_mode > JFC_DB_UNMAP_BOUND ? - (uint64_t)db_addr >> PAGE_SHIFT : + (uint64_t)udma_dev->db_base >> PAGE_SHIFT : page_to_pfn(udma_dev->db_page), PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; break; case UDMA_MMAP_JETTY_DSQE: - j_id = get_mmap_idx(vma); - xa_lock(&udma_dev->jetty_table.xa); - sq = xa_load(&udma_dev->jetty_table.xa, j_id); - if (!sq) { - dev_err(udma_dev->dev, - "mmap failed, j_id: %llu not exist\n", j_id); - xa_unlock(&udma_dev->jetty_table.xa); - return -EINVAL; + return udma_mmap_jetty_dsqe(udma_dev, uctx, vma); + case UDMA_MMAP_HUGEPAGE: + return udma_mmap_hugepage(udma_dev, uctx, vma); + default: + dev_err(udma_dev->dev, + "mmap failed, cmd(%u) not support\n", cmd); + return -EINVAL; + } + + return 0; +} + +int udma_alloc_u_hugepage(struct udma_context *ctx, struct vm_area_struct *vma) +{ + uint32_t page_num = (vma->vm_end - vma->vm_start) >> UDMA_HUGEPAGE_SHIFT; + struct udma_hugepage_priv *priv; + int ret = -ENOMEM; + int i; + + mutex_lock(&ctx->dev->hugepage_lock); + if (page_num > ctx->dev->total_hugepage_num) { + dev_err(ctx->dev->dev, "insufficient resources for mmap.\n"); + mutex_unlock(&ctx->dev->hugepage_lock); + return -EINVAL; + } + ctx->dev->total_hugepage_num -= page_num; + mutex_unlock(&ctx->dev->hugepage_lock); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + goto err_alloc_priv; + + priv->page_num = page_num; + priv->pages = kcalloc(priv->page_num, sizeof(*priv->pages), GFP_KERNEL); + if (!priv->pages) + goto err_alloc_arr; + + for (i = 0; i < priv->page_num; i++) { + priv->pages[i] = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(UDMA_HUGEPAGE_SIZE)); + if (!priv->pages[i]) { + dev_err(ctx->dev->dev, "failed to alloc 2M pages.\n"); + goto err_alloc_pages; + } + ret = remap_pfn_range(vma, vma->vm_start + i * UDMA_HUGEPAGE_SIZE, + page_to_pfn(priv->pages[i]), UDMA_HUGEPAGE_SIZE, + vma->vm_page_prot); + if (ret) { + dev_err(ctx->dev->dev, "failed to remap_pfn_range, ret=%d.\n", ret); + goto err_remap_pfn_range; } + } + + priv->va_base = (void *)vma->vm_start; + priv->va_len = priv->page_num << UDMA_HUGEPAGE_SHIFT; + priv->left_va_len = priv->va_len; + refcount_set(&priv->refcnt, 1); + + mutex_lock(&ctx->hugepage_lock); + list_add(&priv->list, &ctx->hugepage_list); + mutex_unlock(&ctx->hugepage_lock); - if (sq->is_jetty) - jetty_uctx = to_udma_jetty_from_queue(sq)->ubcore_jetty.uctx; + if (dfx_switch) + dev_info_ratelimited(ctx->dev->dev, "map_hugepage, 2m_page_num=%u.\n", + priv->page_num); + return 0; + +err_remap_pfn_range: +err_alloc_pages: + for (i = 0; i < priv->page_num; i++) { + if (priv->pages[i]) + __free_pages(priv->pages[i], get_order(UDMA_HUGEPAGE_SIZE)); else - jetty_uctx = to_udma_jfs_from_queue(sq)->ubcore_jfs.uctx; + break; + } + kfree(priv->pages); +err_alloc_arr: + kfree(priv); +err_alloc_priv: + mutex_lock(&ctx->dev->hugepage_lock); + ctx->dev->total_hugepage_num += page_num; + mutex_unlock(&ctx->dev->hugepage_lock); - if (jetty_uctx != uctx) { - dev_err(udma_dev->dev, - "mmap failed, j_id: %llu, uctx invalid\n", j_id); - xa_unlock(&udma_dev->jetty_table.xa); - return -EINVAL; - } - xa_unlock(&udma_dev->jetty_table.xa); + return ret; +} - address = (uint64_t)db_addr + JETTY_DSQE_OFFSET + j_id * UDMA_HW_PAGE_SIZE; +static struct udma_hugepage_priv *udma_list_find_before(struct udma_context *ctx, void *va) +{ + struct udma_hugepage_priv *priv; - if (io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - break; - default: - dev_err(udma_dev->dev, - "mmap failed, cmd(%u) not support\n", cmd); - return -EINVAL; + list_for_each_entry(priv, &ctx->hugepage_list, list) { + if (va >= priv->va_base && va < priv->va_base + priv->va_len) + return priv; } - return 0; + return NULL; +} + +int udma_occupy_u_hugepage(struct udma_context *ctx, void *va) +{ + struct udma_hugepage_priv *priv; + + mutex_lock(&ctx->hugepage_lock); + priv = udma_list_find_before(ctx, va); + if (priv) { + if (dfx_switch) + dev_info_ratelimited(ctx->dev->dev, "occupy_hugepage.\n"); + refcount_inc(&priv->refcnt); + } + mutex_unlock(&ctx->hugepage_lock); + + return priv ? 0 : -EFAULT; +} + +void udma_return_u_hugepage(struct udma_context *ctx, void *va) +{ + struct udma_hugepage_priv *priv; + struct vm_area_struct *vma; + uint32_t i; + + mutex_lock(&ctx->hugepage_lock); + priv = udma_list_find_before(ctx, va); + if (!priv) { + mutex_unlock(&ctx->hugepage_lock); + dev_warn(ctx->dev->dev, "va is invalid addr.\n"); + return; + } + + if (dfx_switch) + dev_info_ratelimited(ctx->dev->dev, "return_hugepage.\n"); + refcount_dec(&priv->refcnt); + if (!refcount_dec_if_one(&priv->refcnt)) { + mutex_unlock(&ctx->hugepage_lock); + return; + } + + list_del(&priv->list); + mutex_unlock(&ctx->hugepage_lock); + + if (current->mm) { + mmap_write_lock(current->mm); + vma = find_vma(current->mm, (unsigned long)priv->va_base); + if (vma != NULL && vma->vm_start <= (unsigned long)priv->va_base && + vma->vm_end >= (unsigned long)(priv->va_base + priv->va_len)) + zap_vma_ptes(vma, (unsigned long)priv->va_base, priv->va_len); + mmap_write_unlock(current->mm); + } else { + dev_warn(ctx->dev->dev, "current mm released.\n"); + } + + if (dfx_switch) + dev_info_ratelimited(ctx->dev->dev, "unmap_hugepage, 2m_page_num=%u.\n", + priv->page_num); + mutex_lock(&ctx->dev->hugepage_lock); + for (i = 0; i < priv->page_num; i++) + __free_pages(priv->pages[i], get_order(UDMA_HUGEPAGE_SIZE)); + ctx->dev->total_hugepage_num += priv->page_num; + mutex_unlock(&ctx->dev->hugepage_lock); + kfree(priv->pages); + kfree(priv); } diff --git a/drivers/ub/urma/hw/udma/udma_ctx.h b/drivers/ub/urma/hw/udma/udma_ctx.h index a93aab94c1e9e34b593b48a1204dd98cb3078d17..2521d2de310850b7bbd5b2ce26e3f7cbe5787488 100644 --- a/drivers/ub/urma/hw/udma/udma_ctx.h +++ b/drivers/ub/urma/hw/udma/udma_ctx.h @@ -16,6 +16,8 @@ struct udma_context { struct mutex pgdir_mutex; struct iommu_sva *sva; uint32_t tid; + struct mutex hugepage_lock; + struct list_head hugepage_list; }; static inline struct udma_context *to_udma_context(struct ubcore_ucontext *uctx) @@ -39,4 +41,8 @@ struct ubcore_ucontext *udma_alloc_ucontext(struct ubcore_device *ub_dev, int udma_free_ucontext(struct ubcore_ucontext *ucontext); int udma_mmap(struct ubcore_ucontext *uctx, struct vm_area_struct *vma); +int udma_alloc_u_hugepage(struct udma_context *ctx, struct vm_area_struct *vma); +int udma_occupy_u_hugepage(struct udma_context *ctx, void *va); +void udma_return_u_hugepage(struct udma_context *ctx, void *va); + #endif /* __UDMA_CTX_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_db.c b/drivers/ub/urma/hw/udma/udma_db.c index ea7b5d98ee6bcd05d519787bb25166afd5a6a9bc..c66d6b23b2e8a2aa41f805637cbb67381efc930d 100644 --- a/drivers/ub/urma/hw/udma/udma_db.c +++ b/drivers/ub/urma/hw/udma/udma_db.c @@ -115,7 +115,7 @@ static struct udma_k_sw_db_page *udma_alloc_db_page(struct udma_dev *dev, bitmap_fill(page->bitmap, page->num_db); - ret = udma_k_alloc_buf(dev, PAGE_SIZE, &page->db_buf); + ret = udma_alloc_normal_buf(dev, PAGE_SIZE, &page->db_buf); if (ret) { dev_err(dev->dev, "Failed alloc db page buf, ret is %d.\n", ret); goto err_kva; @@ -165,7 +165,7 @@ void udma_free_sw_db(struct udma_dev *dev, struct udma_sw_db *db) set_bit(db->index, db->kpage->bitmap); if (bitmap_full(db->kpage->bitmap, db->kpage->num_db)) { - udma_k_free_buf(dev, PAGE_SIZE, &db->kpage->db_buf); + udma_free_normal_buf(dev, PAGE_SIZE, &db->kpage->db_buf); bitmap_free(db->kpage->bitmap); list_del(&db->kpage->list); kfree(db->kpage); diff --git a/drivers/ub/urma/hw/udma/udma_def.h b/drivers/ub/urma/hw/udma/udma_def.h index ca107e34a37c53f954cd13eb0cb4c5c557b40dee..0681f6dd950d7664452eb089aa27218e6d3afe2b 100644 --- a/drivers/ub/urma/hw/udma/udma_def.h +++ b/drivers/ub/urma/hw/udma/udma_def.h @@ -63,6 +63,8 @@ struct udma_caps { uint16_t rc_queue_num; uint16_t rc_queue_depth; uint8_t rc_entry_size; + uint64_t rc_dma_len; + dma_addr_t rc_dma_addr; uint8_t ack_queue_num; uint8_t port_num; uint8_t cqe_size; @@ -109,6 +111,24 @@ struct udma_sw_db_page { refcount_t refcount; }; +struct udma_hugepage_priv { + struct list_head list; + struct page **pages; + uint32_t page_num; + struct ubcore_umem *umem; + void *va_base; + uint32_t va_len; + uint32_t left_va_offset; + uint32_t left_va_len; + refcount_t refcnt; +}; + +struct udma_hugepage { + void *va_start; + uint32_t va_len; + struct udma_hugepage_priv *priv; +}; + struct udma_buf { dma_addr_t addr; union { @@ -123,6 +143,8 @@ struct udma_buf { uint32_t cnt_per_page_shift; struct xarray id_table_xa; struct mutex id_table_mutex; + bool is_hugepage; + struct udma_hugepage *hugepage; }; struct udma_k_sw_db_page { diff --git a/drivers/ub/urma/hw/udma/udma_dev.h b/drivers/ub/urma/hw/udma/udma_dev.h index d9b10ab28028eb1e7e0ad3115259d16fa5745cc4..1f76ccb84c30b26840e6c7ca9b8fba7513b6267a 100644 --- a/drivers/ub/urma/hw/udma/udma_dev.h +++ b/drivers/ub/urma/hw/udma/udma_dev.h @@ -23,6 +23,8 @@ extern bool dump_aux_info; #define UDMA_CTX_NUM 2 +#define UDMA_BITS_PER_INT 32 + #define MAX_JETTY_IN_JETTY_GRP 32 #define UDMA_USER_DATA_H_OFFSET 32U @@ -33,6 +35,8 @@ extern bool dump_aux_info; #define UDMA_HW_PAGE_SHIFT 12 #define UDMA_HW_PAGE_SIZE (1 << UDMA_HW_PAGE_SHIFT) +#define UDMA_HUGEPAGE_SHIFT 21 +#define UDMA_HUGEPAGE_SIZE (1 << UDMA_HUGEPAGE_SHIFT) #define UDMA_DEV_UE_NUM 47 @@ -113,6 +117,7 @@ struct udma_dev { struct xarray crq_nb_table; struct xarray npu_nb_table; struct mutex npu_nb_mutex; + struct xarray ctrlq_tpid_table; struct xarray tpn_ue_idx_table; struct ubase_event_nb *ae_event_addr[UBASE_EVENT_TYPE_MAX]; resource_size_t db_base; @@ -144,6 +149,9 @@ struct udma_dev { u8 udma_sl[UDMA_MAX_SL_NUM]; int disable_ue_rx_count; struct mutex disable_ue_rx_mutex; + struct mutex hugepage_lock; + struct list_head hugepage_list; + uint32_t total_hugepage_num; }; #define UDMA_ERR_MSG_LEN 128 diff --git a/drivers/ub/urma/hw/udma/udma_jetty.c b/drivers/ub/urma/hw/udma/udma_jetty.c index 914ef33b81d987630d768b0c35054773d07af8a2..c3f3f9a90fb3680114d3b975e54c9b0dc5d6ce1a 100644 --- a/drivers/ub/urma/hw/udma/udma_jetty.c +++ b/drivers/ub/urma/hw/udma/udma_jetty.c @@ -66,6 +66,7 @@ static int udma_get_user_jetty_cmd(struct udma_dev *dev, struct udma_jetty *jett } uctx = to_udma_context(udata->uctx); + jetty->sq.udma_ctx = uctx; jetty->sq.tid = uctx->tid; jetty->jetty_addr = ucmd->jetty_addr; jetty->pi_type = ucmd->pi_type; @@ -172,6 +173,112 @@ static void udma_init_jettyc(struct udma_dev *dev, struct ubcore_jetty_cfg *cfg, ctx->next_rcv_ssn = ctx->next_send_ssn; } +static int update_jetty_grp_ctx_valid(struct udma_dev *udma_dev, + struct udma_jetty_grp *jetty_grp) +{ + struct udma_jetty_grp_ctx ctx[UDMA_CTX_NUM]; + struct ubase_mbx_attr mbox_attr = {}; + int ret; + + ctx[0].valid = jetty_grp->valid; + /* jetty number indicates the location of the jetty with the largest ID. */ + ctx[0].jetty_number = fls(jetty_grp->valid) - 1; + memset(ctx + 1, 0xff, sizeof(ctx[1])); + ctx[1].valid = 0; + ctx[1].jetty_number = 0; + + mbox_attr.tag = jetty_grp->jetty_grp_id; + mbox_attr.op = UDMA_CMD_MODIFY_JETTY_GROUP_CONTEXT; + ret = post_mailbox_update_ctx(udma_dev, ctx, sizeof(ctx), &mbox_attr); + if (ret) + dev_err(udma_dev->dev, + "post mailbox update jetty grp ctx failed, ret = %d.\n", + ret); + + return ret; +} + +static uint32_t udma_get_jetty_grp_jetty_id(uint32_t *valid, uint32_t *next) +{ + uint32_t bit_idx; + + bit_idx = find_next_zero_bit((unsigned long *)valid, UDMA_BITS_PER_INT, *next); + if (bit_idx >= UDMA_BITS_PER_INT) + bit_idx = find_next_zero_bit((unsigned long *)valid, UDMA_BITS_PER_INT, 0); + + *next = (*next + 1) >= UDMA_BITS_PER_INT ? 0 : *next + 1; + + return bit_idx; +} + +static int add_jetty_to_grp(struct udma_dev *udma_dev, struct ubcore_jetty_group *jetty_grp, + struct udma_jetty_queue *sq, uint32_t cfg_id) +{ + struct udma_jetty_grp *udma_jetty_grp = to_udma_jetty_grp(jetty_grp); + uint32_t bit_idx = cfg_id - udma_jetty_grp->start_jetty_id; + int ret = 0; + + mutex_lock(&udma_jetty_grp->valid_lock); + + if (cfg_id == 0) + bit_idx = udma_get_jetty_grp_jetty_id(&udma_jetty_grp->valid, + &udma_jetty_grp->next_jetty_id); + + if (bit_idx >= UDMA_BITS_PER_INT || (udma_jetty_grp->valid & BIT(bit_idx))) { + dev_err(udma_dev->dev, + "jg(%u.%u) vallid %u is full or user id(%u) error", + udma_jetty_grp->jetty_grp_id, udma_jetty_grp->start_jetty_id, + udma_jetty_grp->valid, cfg_id); + ret = -ENOMEM; + goto out; + } + + udma_jetty_grp->valid |= BIT(bit_idx); + sq->id = udma_jetty_grp->start_jetty_id + bit_idx; + sq->jetty_grp = udma_jetty_grp; + + ret = update_jetty_grp_ctx_valid(udma_dev, udma_jetty_grp); + if (ret) { + dev_err(udma_dev->dev, + "update jetty grp ctx valid failed, jetty_grp id is %u.\n", + udma_jetty_grp->jetty_grp_id); + + udma_jetty_grp->valid &= ~BIT(bit_idx); + } +out: + mutex_unlock(&udma_jetty_grp->valid_lock); + + return ret; +} + +static void remove_jetty_from_grp(struct udma_dev *udma_dev, + struct udma_jetty *jetty) +{ + struct udma_jetty_grp *jetty_grp = jetty->sq.jetty_grp; + uint32_t bit_idx; + int ret; + + bit_idx = jetty->sq.id - jetty_grp->start_jetty_id; + if (bit_idx >= UDMA_BITS_PER_INT) { + dev_err(udma_dev->dev, + "jetty_id(%u) is not in jetty grp, start_jetty_id(%u).\n", + jetty->sq.id, jetty_grp->start_jetty_id); + return; + } + + mutex_lock(&jetty_grp->valid_lock); + jetty_grp->valid &= ~BIT(bit_idx); + jetty->sq.jetty_grp = NULL; + + ret = update_jetty_grp_ctx_valid(udma_dev, jetty_grp); + if (ret) + dev_err(udma_dev->dev, + "update jetty grp ctx valid failed, jetty_grp id is %u.\n", + jetty_grp->jetty_grp_id); + + mutex_unlock(&jetty_grp->valid_lock); +} + static int udma_specify_rsvd_jetty_id(struct udma_dev *udma_dev, uint32_t cfg_id) { struct udma_ida *ida_table = &udma_dev->rsvd_jetty_ida_table; @@ -391,6 +498,13 @@ int alloc_jetty_id(struct udma_dev *udma_dev, struct udma_jetty_queue *sq, return ret; sq->id = cfg_id; + } else if (jetty_grp) { + ret = add_jetty_to_grp(udma_dev, jetty_grp, sq, cfg_id); + if (ret) { + dev_err(udma_dev->dev, + "add jetty to grp failed, ret = %d.\n", ret); + return ret; + } } else { ret = udma_alloc_jetty_id_own(udma_dev, &sq->id, sq->jetty_type); } @@ -403,6 +517,8 @@ static void free_jetty_id(struct udma_dev *udma_dev, { if (udma_jetty->sq.id < udma_dev->caps.jetty.start_idx) udma_id_free(&udma_dev->rsvd_jetty_ida_table, udma_jetty->sq.id); + else if (is_grp) + remove_jetty_from_grp(udma_dev, udma_jetty); else udma_adv_id_free(&udma_dev->jetty_table.bitmap_table, udma_jetty->sq.id, false); @@ -653,6 +769,19 @@ static int udma_query_jetty_ctx(struct udma_dev *dev, return 0; } +void udma_clean_cqe_for_jetty(struct udma_dev *dev, struct udma_jetty_queue *sq, + struct ubcore_jfc *send_jfc, + struct ubcore_jfc *recv_jfc) +{ + if (sq->buf.kva) { + if (send_jfc) + udma_clean_jfc(send_jfc, sq->id, dev); + + if (recv_jfc && recv_jfc != send_jfc) + udma_clean_jfc(recv_jfc, sq->id, dev); + } +} + static bool udma_wait_timeout(uint32_t *sum_times, uint32_t times, uint32_t ta_timeout) { uint32_t wait_time; @@ -791,6 +920,9 @@ static void udma_free_jetty(struct ubcore_jetty *jetty) struct udma_dev *udma_dev = to_udma_dev(jetty->ub_dev); struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + udma_clean_cqe_for_jetty(udma_dev, &udma_jetty->sq, jetty->jetty_cfg.send_jfc, + jetty->jetty_cfg.recv_jfc); + if (dfx_switch) udma_dfx_delete_id(udma_dev, &udma_dev->dfx_info->jetty, udma_jetty->sq.id); @@ -832,6 +964,347 @@ int udma_destroy_jetty(struct ubcore_jetty *jetty) return 0; } +static int udma_batch_jetty_get_ack(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, bool *jetty_flag, + int *bad_jetty_index) +{ + struct udma_jetty_ctx ctx = {}; + struct udma_jetty_queue *sq; + uint16_t rcv_send_diff = 0; + uint32_t i; + int ret; + + for (i = 0; i < jetty_cnt; i++) { + sq = sq_list[i]; + if (sq->state != UBCORE_JETTY_STATE_READY && + sq->state != UBCORE_JETTY_STATE_SUSPENDED) + continue; + + if (jetty_flag[i]) + continue; + + ret = udma_query_jetty_ctx(dev, &ctx, sq->id); + if (ret) { + dev_err(dev->dev, + "query jetty ctx failed, id = %u, ret = %d.\n", + sq->id, ret); + *bad_jetty_index = 0; + return ret; + } + + rcv_send_diff = ctx.next_rcv_ssn - ctx.next_send_ssn; + if (ctx.PI == ctx.CI && rcv_send_diff < UDMA_RCV_SEND_MAX_DIFF && + ctx.state == JETTY_READY) { + jetty_flag[i] = true; + continue; + } + + if (rcv_send_diff < UDMA_RCV_SEND_MAX_DIFF && + ctx.state == JETTY_ERROR) { + jetty_flag[i] = true; + continue; + } + + *bad_jetty_index = 0; + break; + } + + return (i == jetty_cnt) ? 0 : -EAGAIN; +} + +static uint32_t get_max_jetty_ta_timeout(struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt) +{ + uint32_t max_timeout = 0; + uint32_t i; + + for (i = 0; i < jetty_cnt; i++) { + if (sq_list[i]->ta_timeout > max_timeout) + max_timeout = sq_list[i]->ta_timeout; + } + + return max_timeout; +} + +static bool udma_batch_query_jetty_fd(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index) +{ + uint32_t ta_timeout = get_max_jetty_ta_timeout(sq_list, jetty_cnt); + struct udma_jetty_ctx ctx = {}; + struct udma_jetty_queue *sq; + uint16_t rcv_send_diff = 0; + uint32_t sum_times = 0; + uint32_t flush_cnt = 0; + bool all_query_done; + uint32_t times = 0; + bool *jetty_flag; + uint32_t i; + + jetty_flag = kcalloc(jetty_cnt, sizeof(bool), GFP_KERNEL); + if (!jetty_flag) { + *bad_jetty_index = 0; + return false; + } + + while (true) { + for (i = 0; i < jetty_cnt; i++) { + if (jetty_flag[i]) + continue; + + sq = sq_list[i]; + if (udma_query_jetty_ctx(dev, &ctx, sq->id)) { + kfree(jetty_flag); + *bad_jetty_index = 0; + return false; + } + + if (!ctx.flush_cqe_done) + continue; + + flush_cnt++; + jetty_flag[i] = true; + } + + if (flush_cnt == jetty_cnt) { + kfree(jetty_flag); + return true; + } + + if (udma_wait_timeout(&sum_times, times, ta_timeout)) + break; + + times++; + } + + all_query_done = true; + + for (i = 0; i < jetty_cnt; i++) { + if (jetty_flag[i]) + continue; + + sq = sq_list[i]; + if (udma_query_jetty_ctx(dev, &ctx, sq->id)) { + kfree(jetty_flag); + *bad_jetty_index = 0; + return false; + } + + rcv_send_diff = ctx.next_rcv_ssn - ctx.next_send_ssn; + if (ctx.flush_cqe_done || (ctx.flush_ssn_vld && + rcv_send_diff < UDMA_RCV_SEND_MAX_DIFF)) + continue; + + *bad_jetty_index = 0; + all_query_done = false; + udma_dfx_ctx_print(dev, "Flush Failed Jetty", sq->id, + sizeof(ctx) / sizeof(uint32_t), (uint32_t *)&ctx); + break; + } + + kfree(jetty_flag); + + return all_query_done; +} + +static int batch_modify_jetty_to_error(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index) +{ + struct udma_jetty_queue *sq; + uint32_t i; + int ret; + + for (i = 0; i < jetty_cnt; i++) { + sq = sq_list[i]; + if (sq->state == UBCORE_JETTY_STATE_ERROR || + sq->state == UBCORE_JETTY_STATE_RESET) + continue; + + ret = udma_set_jetty_state(dev, sq->id, JETTY_ERROR); + if (ret) { + dev_err(dev->dev, "modify jetty to error failed, id: %u.\n", + sq->id); + *bad_jetty_index = 0; + return ret; + } + + sq->state = UBCORE_JETTY_STATE_ERROR; + } + + return 0; +} + +static int udma_batch_modify_jetty_precondition(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index) +{ + uint32_t ta_timeout = get_max_jetty_ta_timeout(sq_list, jetty_cnt); + uint32_t sum_times = 0; + uint32_t times = 0; + bool *jetty_flag; + int ret; + + jetty_flag = kcalloc(jetty_cnt, sizeof(bool), GFP_KERNEL); + if (!jetty_flag) { + *bad_jetty_index = 0; + return -ENOMEM; + } + + while (true) { + ret = udma_batch_jetty_get_ack(dev, sq_list, jetty_cnt, + jetty_flag, bad_jetty_index); + if (ret != -EAGAIN) { + kfree(jetty_flag); + return ret; + } + + if (udma_wait_timeout(&sum_times, times, ta_timeout)) { + dev_warn(dev->dev, + "timeout after %u ms, not all jetty get ack.\n", + sum_times); + break; + } + times++; + } + + kfree(jetty_flag); + + return 0; +} + +static bool udma_batch_destroy_jetty_precondition(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index) +{ + if (!(dev->caps.feature & UDMA_CAP_FEATURE_UE_RX_CLOSE) && + udma_batch_modify_jetty_precondition(dev, sq_list, jetty_cnt, bad_jetty_index)) + return false; + + if (batch_modify_jetty_to_error(dev, sq_list, jetty_cnt, bad_jetty_index)) { + dev_err(dev->dev, "batch md jetty err failed.\n"); + return false; + } + + if (!udma_batch_query_jetty_fd(dev, sq_list, jetty_cnt, bad_jetty_index)) + return false; + + udelay(UDMA_DESTROY_JETTY_DELAY_TIME); + + return true; +} + +int udma_batch_modify_and_destroy_jetty(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index) +{ + uint32_t i; + int ret; + + if (!udma_batch_destroy_jetty_precondition(dev, sq_list, jetty_cnt, bad_jetty_index)) + return -EFAULT; + + for (i = 0; i < jetty_cnt; i++) { + if (sq_list[i]->state != UBCORE_JETTY_STATE_RESET) { + ret = udma_destroy_hw_jetty_ctx(dev, sq_list[i]->id); + if (ret) { + dev_err(dev->dev, + "jetty destroyed failed, id: %u.\n", + sq_list[i]->id); + *bad_jetty_index = 0; + return ret; + } + + sq_list[i]->state = UBCORE_JETTY_STATE_RESET; + } + } + + return 0; +} + +int udma_destroy_jetty_batch(struct ubcore_jetty **jetty, int jetty_cnt, int *bad_jetty_index) +{ + struct udma_jetty_queue **sq_list; + struct udma_dev *udma_dev; + uint32_t i; + int ret; + + if (!jetty) { + pr_err("jetty array is null.\n"); + return -EINVAL; + } + + if (!jetty_cnt) { + pr_err("jetty cnt is 0.\n"); + return -EINVAL; + } + + udma_dev = to_udma_dev(jetty[0]->ub_dev); + + sq_list = kcalloc(1, sizeof(*sq_list) * jetty_cnt, GFP_KERNEL); + if (!sq_list) { + *bad_jetty_index = 0; + return -ENOMEM; + } + + for (i = 0; i < jetty_cnt; i++) + sq_list[i] = &(to_udma_jetty(jetty[i])->sq); + + ret = udma_batch_modify_and_destroy_jetty(udma_dev, sq_list, jetty_cnt, bad_jetty_index); + + kfree(sq_list); + + if (ret) { + dev_err(udma_dev->dev, + "udma batch modify error and destroy jetty failed.\n"); + return ret; + } + + for (i = 0; i < jetty_cnt; i++) + udma_free_jetty(jetty[i]); + + return 0; +} + +static int udma_check_jetty_grp_info(struct ubcore_tjetty_cfg *cfg, struct udma_dev *dev) +{ + if (cfg->type == UBCORE_JETTY_GROUP) { + if (cfg->trans_mode != UBCORE_TP_RM) { + dev_err(dev->dev, "import jg only support RM, transmode is %u.\n", + cfg->trans_mode); + return -EINVAL; + } + + if (cfg->policy != UBCORE_JETTY_GRP_POLICY_HASH_HINT) { + dev_err(dev->dev, "import jg only support hint, policy is %u.\n", + cfg->policy); + return -EINVAL; + } + } + + return 0; +} + +int udma_unimport_jetty(struct ubcore_tjetty *tjetty) +{ + struct udma_target_jetty *udma_tjetty = to_udma_tjetty(tjetty); + struct udma_dev *udma_dev = to_udma_dev(tjetty->ub_dev); + + if (!IS_ERR_OR_NULL(tjetty->vtpn)) { + dev_err(udma_dev->dev, + "the target jetty is still being used, id = %u.\n", + tjetty->cfg.id.id); + return -EINVAL; + } + + udma_tjetty->token_value = 0; + tjetty->cfg.token_value.token = 0; + kfree(udma_tjetty); + + return 0; +} + bool verify_modify_jetty(enum ubcore_jetty_state jetty_state, enum ubcore_jetty_state attr_state) { @@ -1095,3 +1568,121 @@ int udma_delete_jetty_grp(struct ubcore_jetty_group *jetty_grp) return ret; } + +int udma_flush_jetty(struct ubcore_jetty *jetty, int cr_cnt, struct ubcore_cr *cr) +{ + struct udma_dev *udma_dev = to_udma_dev(jetty->ub_dev); + struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + struct udma_jetty_queue *sq = &udma_jetty->sq; + int n_flushed; + + if (!sq->flush_flag) + return 0; + + if (!sq->lock_free) + spin_lock(&sq->lock); + + for (n_flushed = 0; n_flushed < cr_cnt; n_flushed++) { + if (sq->ci == sq->pi) + break; + udma_flush_sq(udma_dev, sq, cr + n_flushed); + } + + if (!sq->lock_free) + spin_unlock(&sq->lock); + + return n_flushed; +} + +int udma_post_jetty_send_wr(struct ubcore_jetty *jetty, struct ubcore_jfs_wr *wr, + struct ubcore_jfs_wr **bad_wr) +{ + struct udma_dev *udma_dev = to_udma_dev(jetty->ub_dev); + struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + int ret; + + ret = udma_post_sq_wr(udma_dev, &udma_jetty->sq, wr, bad_wr); + if (ret) + dev_err(udma_dev->dev, + "jetty post sq wr failed, ret = %d, jetty id = %u.\n", + ret, udma_jetty->sq.id); + + return ret; +} + +int udma_post_jetty_recv_wr(struct ubcore_jetty *jetty, struct ubcore_jfr_wr *wr, + struct ubcore_jfr_wr **bad_wr) +{ + struct udma_dev *udma_dev = to_udma_dev(jetty->ub_dev); + struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + struct ubcore_jfr *jfr; + int ret; + + jfr = &udma_jetty->jfr->ubcore_jfr; + ret = udma_post_jfr_wr(jfr, wr, bad_wr); + if (ret) + dev_err(udma_dev->dev, + "jetty post jfr wr failed, ret = %d, jetty id = %u.\n", + ret, udma_jetty->sq.id); + + return ret; +} + +int udma_unbind_jetty(struct ubcore_jetty *jetty) +{ + struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + + udma_jetty->sq.rc_tjetty = NULL; + + return 0; +} + +struct ubcore_tjetty *udma_import_jetty_ex(struct ubcore_device *ub_dev, + struct ubcore_tjetty_cfg *cfg, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata) +{ + struct udma_dev *udma_dev = to_udma_dev(ub_dev); + struct udma_target_jetty *tjetty; + int ret = 0; + + if (cfg->type != UBCORE_JETTY_GROUP && cfg->type != UBCORE_JETTY) { + dev_err(udma_dev->dev, + "the jetty of the type %u cannot be imported in exp.\n", + cfg->type); + return NULL; + } + + ret = udma_check_jetty_grp_info(cfg, udma_dev); + if (ret) + return NULL; + + tjetty = kzalloc(sizeof(*tjetty), GFP_KERNEL); + if (!tjetty) + return NULL; + + if (cfg->flag.bs.token_policy != UBCORE_TOKEN_NONE) { + tjetty->token_value = cfg->token_value.token; + tjetty->token_value_valid = true; + } + + udma_swap_endian(cfg->id.eid.raw, tjetty->le_eid.raw, UBCORE_EID_SIZE); + + return &tjetty->ubcore_tjetty; +} + +int udma_bind_jetty_ex(struct ubcore_jetty *jetty, + struct ubcore_tjetty *tjetty, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata) +{ + struct udma_jetty *udma_jetty = to_udma_jetty(jetty); + + udma_jetty->sq.rc_tjetty = tjetty; + + return 0; +} + +module_param(well_known_jetty_pgsz_check, bool, 0444); +MODULE_PARM_DESC(well_known_jetty_pgsz_check, + "Whether check the system page size. default: true(true:check; false: not check)"); diff --git a/drivers/ub/urma/hw/udma/udma_jetty.h b/drivers/ub/urma/hw/udma/udma_jetty.h index 5b428e999ff195c503a203a0ce2f34c8146c3bf7..011711dc19263b0cff290251eaaf536663b8d6c8 100644 --- a/drivers/ub/urma/hw/udma/udma_jetty.h +++ b/drivers/ub/urma/hw/udma/udma_jetty.h @@ -45,6 +45,13 @@ struct udma_jetty { bool ue_rx_closed; }; +struct udma_target_jetty { + struct ubcore_tjetty ubcore_tjetty; + union ubcore_eid le_eid; + uint32_t token_value; + bool token_value_valid; +}; + enum jfsc_mode { JFS, JETTY, @@ -214,6 +221,11 @@ static inline struct udma_jetty_grp *to_udma_jetty_grp(struct ubcore_jetty_group return container_of(jetty_grp, struct udma_jetty_grp, ubcore_jetty_grp); } +static inline struct udma_target_jetty *to_udma_tjetty(struct ubcore_tjetty *tjetty) +{ + return container_of(tjetty, struct udma_target_jetty, ubcore_tjetty); +} + static inline struct udma_jetty *to_udma_jetty_from_queue(struct udma_jetty_queue *queue) { return container_of(queue, struct udma_jetty, sq); @@ -229,20 +241,44 @@ struct ubcore_jetty *udma_create_jetty(struct ubcore_device *ub_dev, struct ubcore_jetty_cfg *cfg, struct ubcore_udata *udata); int udma_destroy_jetty(struct ubcore_jetty *jetty); +int udma_destroy_jetty_batch(struct ubcore_jetty **jetty_arr, int jetty_num, int *bad_jetty_index); +int udma_unimport_jetty(struct ubcore_tjetty *tjetty); int udma_modify_jetty(struct ubcore_jetty *jetty, struct ubcore_jetty_attr *attr, struct ubcore_udata *udata); struct ubcore_jetty_group *udma_create_jetty_grp(struct ubcore_device *dev, struct ubcore_jetty_grp_cfg *cfg, struct ubcore_udata *udata); int udma_delete_jetty_grp(struct ubcore_jetty_group *jetty_grp); +int udma_flush_jetty(struct ubcore_jetty *jetty, int cr_cnt, struct ubcore_cr *cr); int udma_set_jetty_state(struct udma_dev *dev, uint32_t jetty_id, enum jetty_state state); - +int udma_post_jetty_send_wr(struct ubcore_jetty *jetty, struct ubcore_jfs_wr *wr, + struct ubcore_jfs_wr **bad_wr); +int udma_post_jetty_recv_wr(struct ubcore_jetty *jetty, struct ubcore_jfr_wr *wr, + struct ubcore_jfr_wr **bad_wr); +int udma_unbind_jetty(struct ubcore_jetty *jetty); void udma_reset_sw_k_jetty_queue(struct udma_jetty_queue *sq); int udma_destroy_hw_jetty_ctx(struct udma_dev *dev, uint32_t jetty_id); void udma_set_query_flush_time(struct udma_jetty_queue *sq, uint8_t err_timeout); int udma_modify_and_destroy_jetty(struct udma_dev *dev, struct udma_jetty_queue *sq); +int udma_alloc_jetty_id(struct udma_dev *udma_dev, uint32_t *idx, + struct udma_res *jetty_res); int udma_modify_jetty_precondition(struct udma_dev *dev, struct udma_jetty_queue *sq); +struct ubcore_tjetty *udma_import_jetty_ex(struct ubcore_device *ub_dev, + struct ubcore_tjetty_cfg *cfg, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata); +int udma_bind_jetty_ex(struct ubcore_jetty *jetty, + struct ubcore_tjetty *tjetty, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata); +void udma_clean_cqe_for_jetty(struct udma_dev *dev, struct udma_jetty_queue *sq, + struct ubcore_jfc *send_jfc, + struct ubcore_jfc *recv_jfc); +int udma_batch_modify_and_destroy_jetty(struct udma_dev *dev, + struct udma_jetty_queue **sq_list, + uint32_t jetty_cnt, int *bad_jetty_index); + #endif /* __UDMA_JETTY_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_jfc.c b/drivers/ub/urma/hw/udma/udma_jfc.c index 5067b3c52104879354ce69686ed0c228e41a3e8c..92c9fcbaae9f8f6aba64322db041285ba3848daf 100644 --- a/drivers/ub/urma/hw/udma/udma_jfc.c +++ b/drivers/ub/urma/hw/udma/udma_jfc.c @@ -120,70 +120,83 @@ static int udma_get_cmd_from_user(struct udma_create_jfc_ucmd *ucmd, return 0; } -static int udma_get_jfc_buf(struct udma_dev *dev, struct udma_create_jfc_ucmd *ucmd, - struct ubcore_udata *udata, struct udma_jfc *jfc) +static int udma_alloc_u_cq(struct udma_dev *dev, struct udma_create_jfc_ucmd *ucmd, + struct udma_jfc *jfc) { - struct udma_context *uctx; - uint32_t size; - int ret = 0; + int ret; - if (udata) { + if (ucmd->is_hugepage) { + jfc->buf.addr = ucmd->buf_addr; + if (udma_occupy_u_hugepage(jfc->ctx, (void *)jfc->buf.addr)) { + dev_err(dev->dev, "failed to create cq, va not map.\n"); + return -EINVAL; + } + jfc->buf.is_hugepage = true; + } else { ret = pin_queue_addr(dev, ucmd->buf_addr, ucmd->buf_len, &jfc->buf); if (ret) { dev_err(dev->dev, "failed to pin queue for jfc, ret = %d.\n", ret); return ret; } - uctx = to_udma_context(udata->uctx); - jfc->tid = uctx->tid; - ret = udma_pin_sw_db(uctx, &jfc->db); - if (ret) { - dev_err(dev->dev, "failed to pin sw db for jfc, ret = %d.\n", ret); - unpin_queue_addr(jfc->buf.umem); - } + } + jfc->tid = jfc->ctx->tid; - return ret; + ret = udma_pin_sw_db(jfc->ctx, &jfc->db); + if (ret) { + dev_err(dev->dev, "failed to pin sw db for jfc, ret = %d.\n", ret); + goto err_pin_db; } + return 0; +err_pin_db: + if (ucmd->is_hugepage) + udma_return_u_hugepage(jfc->ctx, (void *)jfc->buf.addr); + else + unpin_queue_addr(jfc->buf.umem); + + return ret; +} + +static int udma_alloc_k_cq(struct udma_dev *dev, struct udma_jfc *jfc) +{ + int ret; + if (!jfc->lock_free) spin_lock_init(&jfc->lock); + jfc->buf.entry_size = dev->caps.cqe_size; jfc->tid = dev->tid; - size = jfc->buf.entry_size * jfc->buf.entry_cnt; - - ret = udma_k_alloc_buf(dev, size, &jfc->buf); + ret = udma_k_alloc_buf(dev, &jfc->buf); if (ret) { - dev_err(dev->dev, "failed to alloc buffer for jfc.\n"); + dev_err(dev->dev, "failed to alloc cq buffer, id=%u.\n", jfc->jfcn); return ret; } ret = udma_alloc_sw_db(dev, &jfc->db, UDMA_JFC_TYPE_DB); if (ret) { dev_err(dev->dev, "failed to alloc sw db for jfc(%u).\n", jfc->jfcn); - udma_k_free_buf(dev, size, &jfc->buf); - return -ENOMEM; + udma_k_free_buf(dev, &jfc->buf); } return ret; } -static void udma_free_jfc_buf(struct udma_dev *dev, struct udma_jfc *jfc) +static void udma_free_cq(struct udma_dev *dev, struct udma_jfc *jfc) { - struct udma_context *uctx; - uint32_t size; - - if (jfc->buf.kva) { - size = jfc->buf.entry_size * jfc->buf.entry_cnt; - udma_k_free_buf(dev, size, &jfc->buf); - } else if (jfc->buf.umem) { - uctx = to_udma_context(jfc->base.uctx); - unpin_queue_addr(jfc->buf.umem); + if (jfc->mode != UDMA_NORMAL_JFC_TYPE) { + udma_free_sw_db(dev, &jfc->db); + return; } - if (jfc->db.page) { - uctx = to_udma_context(jfc->base.uctx); - udma_unpin_sw_db(uctx, &jfc->db); - } else if (jfc->db.kpage) { + if (jfc->buf.kva) { + udma_k_free_buf(dev, &jfc->buf); udma_free_sw_db(dev, &jfc->db); + } else { + if (jfc->buf.is_hugepage) + udma_return_u_hugepage(jfc->ctx, (void *)jfc->buf.addr); + else + unpin_queue_addr(jfc->buf.umem); + udma_unpin_sw_db(jfc->ctx, &jfc->db); } } @@ -369,7 +382,7 @@ struct ubcore_jfc *udma_create_jfc(struct ubcore_device *ubcore_dev, goto err_store_jfcn; } - ret = udma_get_jfc_buf(dev, &ucmd, udata, jfc); + ret = udata ? udma_alloc_u_cq(dev, &ucmd, jfc) : udma_alloc_k_cq(dev, jfc); if (ret) goto err_get_jfc_buf; @@ -387,7 +400,7 @@ struct ubcore_jfc *udma_create_jfc(struct ubcore_device *ubcore_dev, err_alloc_cqc: jfc->base.uctx = (udata == NULL ? NULL : udata->uctx); - udma_free_jfc_buf(dev, jfc); + udma_free_cq(dev, jfc); err_get_jfc_buf: xa_lock_irqsave(&dev->jfc_table.xa, flags_erase); __xa_erase(&dev->jfc_table.xa, jfc->jfcn); @@ -497,7 +510,7 @@ int udma_destroy_jfc(struct ubcore_jfc *jfc) if (dfx_switch) udma_dfx_delete_id(dev, &dev->dfx_info->jfc, jfc->id); - udma_free_jfc_buf(dev, ujfc); + udma_free_cq(dev, ujfc); udma_id_free(&dev->jfc_table.ida_table, ujfc->jfcn); kfree(ujfc); @@ -645,3 +658,459 @@ int udma_modify_jfc(struct ubcore_jfc *ubcore_jfc, struct ubcore_jfc_attr *attr, return ret; } + +int udma_rearm_jfc(struct ubcore_jfc *jfc, bool solicited_only) +{ + struct udma_dev *dev = to_udma_dev(jfc->ub_dev); + struct udma_jfc *udma_jfc = to_udma_jfc(jfc); + struct udma_jfc_db db; + + db.ci = udma_jfc->ci & (uint32_t)UDMA_JFC_DB_CI_IDX_M; + db.notify = solicited_only; + db.arm_sn = udma_jfc->arm_sn; + db.type = UDMA_CQ_ARM_DB; + db.jfcn = udma_jfc->jfcn; + + udma_write64(dev, (uint64_t *)&db, (void __iomem *)(dev->k_db_base + + UDMA_JFC_HW_DB_OFFSET)); + + return 0; +} + +static enum jfc_poll_state udma_get_cr_status(struct udma_dev *dev, + uint8_t src_status, + uint8_t substatus, + enum ubcore_cr_status *dst_status) +{ +#define UDMA_SRC_STATUS_NUM 7 +#define UDMA_SUB_STATUS_NUM 5 + +struct udma_cr_status { + bool is_valid; + enum ubcore_cr_status cr_status; +}; + + static struct udma_cr_status map[UDMA_SRC_STATUS_NUM][UDMA_SUB_STATUS_NUM] = { + {{true, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}}, + {{true, UBCORE_CR_UNSUPPORTED_OPCODE_ERR}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}}, + {{false, UBCORE_CR_SUCCESS}, {true, UBCORE_CR_LOC_LEN_ERR}, + {true, UBCORE_CR_LOC_ACCESS_ERR}, {true, UBCORE_CR_REM_RESP_LEN_ERR}, + {true, UBCORE_CR_LOC_DATA_POISON}}, + {{false, UBCORE_CR_SUCCESS}, {true, UBCORE_CR_REM_UNSUPPORTED_REQ_ERR}, + {true, UBCORE_CR_REM_ACCESS_ABORT_ERR}, {false, UBCORE_CR_SUCCESS}, + {true, UBCORE_CR_REM_DATA_POISON}}, + {{true, UBCORE_CR_RNR_RETRY_CNT_EXC_ERR}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}}, + {{true, UBCORE_CR_ACK_TIMEOUT_ERR}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}}, + {{true, UBCORE_CR_FLUSH_ERR}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}, {false, UBCORE_CR_SUCCESS}, + {false, UBCORE_CR_SUCCESS}} + }; + + if ((src_status < UDMA_SRC_STATUS_NUM) && (substatus < UDMA_SUB_STATUS_NUM) && + map[src_status][substatus].is_valid) { + *dst_status = map[src_status][substatus].cr_status; + return JFC_OK; + } + + dev_err(dev->dev, "cqe status is error, status = %u, substatus = %u.\n", + src_status, substatus); + + return JFC_POLL_ERR; +} + +static void udma_handle_inline_cqe(struct udma_jfc_cqe *cqe, uint8_t opcode, + struct udma_jetty_queue *queue, + struct ubcore_cr *cr) +{ + struct udma_jfr *jfr = to_udma_jfr_from_queue(queue); + uint32_t rqe_idx, data_len, sge_idx, size; + struct udma_wqe_sge *sge_list; + void *cqe_inline_buf; + + rqe_idx = cqe->entry_idx; + sge_list = (struct udma_wqe_sge *)(jfr->rq.buf.kva + + rqe_idx * jfr->rq.buf.entry_size); + data_len = cqe->byte_cnt; + cqe_inline_buf = opcode == HW_CQE_OPC_SEND ? + (void *)&cqe->data_l : (void *)&cqe->inline_data; + + for (sge_idx = 0; (sge_idx < jfr->max_sge) && data_len; sge_idx++) { + size = sge_list[sge_idx].length < data_len ? + sge_list[sge_idx].length : data_len; + memcpy((void *)(uintptr_t)sge_list[sge_idx].va, + cqe_inline_buf, size); + data_len -= size; + cqe_inline_buf += size; + } + cr->completion_len = cqe->byte_cnt - data_len; + + if (data_len) { + cqe->status = UDMA_CQE_LOCAL_OP_ERR; + cqe->substatus = UDMA_CQE_LOCAL_LENGTH_ERR; + } +} + +static void udma_parse_opcode_for_res(struct udma_dev *dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr, + struct list_head *tid_list) +{ + uint8_t opcode = cqe->opcode; + struct udma_inv_tid *inv_tid; + + switch (opcode) { + case HW_CQE_OPC_SEND: + cr->opcode = UBCORE_CR_OPC_SEND; + break; + case HW_CQE_OPC_SEND_WITH_IMM: + cr->imm_data = (uint64_t)cqe->data_h << UDMA_IMM_DATA_SHIFT | + cqe->data_l; + cr->opcode = UBCORE_CR_OPC_SEND_WITH_IMM; + break; + case HW_CQE_OPC_SEND_WITH_INV: + cr->invalid_token.token_id = cqe->data_l & (uint32_t)UDMA_CQE_INV_TOKEN_ID; + cr->invalid_token.token_id <<= UDMA_TID_SHIFT; + cr->invalid_token.token_value.token = cqe->data_h; + cr->opcode = UBCORE_CR_OPC_SEND_WITH_INV; + + inv_tid = kzalloc(sizeof(*inv_tid), GFP_ATOMIC); + if (!inv_tid) + return; + + inv_tid->tid = cr->invalid_token.token_id >> UDMA_TID_SHIFT; + list_add(&inv_tid->list, tid_list); + + break; + case HW_CQE_OPC_WRITE_WITH_IMM: + cr->imm_data = (uint64_t)cqe->data_h << UDMA_IMM_DATA_SHIFT | + cqe->data_l; + cr->opcode = UBCORE_CR_OPC_WRITE_WITH_IMM; + break; + default: + cr->opcode = (enum ubcore_cr_opcode)HW_CQE_OPC_ERR; + dev_err(dev->dev, "receive invalid opcode :%u.\n", opcode); + cr->status = UBCORE_CR_UNSUPPORTED_OPCODE_ERR; + break; + } +} + +static struct udma_jfr *udma_get_jfr(struct udma_dev *udma_dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr) +{ + struct udma_jetty_queue *udma_sq; + struct udma_jetty *jetty = NULL; + struct udma_jfr *jfr = NULL; + uint32_t local_id; + + local_id = cr->local_id; + if (cqe->is_jetty) { + udma_sq = (struct udma_jetty_queue *)xa_load(&udma_dev->jetty_table.xa, local_id); + if (!udma_sq) { + dev_warn(udma_dev->dev, + "get jetty failed, jetty_id = %u.\n", local_id); + return NULL; + } + jetty = to_udma_jetty_from_queue(udma_sq); + jfr = jetty->jfr; + cr->user_data = (uintptr_t)&jetty->ubcore_jetty; + } else { + jfr = (struct udma_jfr *)xa_load(&udma_dev->jfr_table.xa, local_id); + if (!jfr) { + dev_warn(udma_dev->dev, + "get jfr failed jfr id = %u.\n", local_id); + return NULL; + } + cr->user_data = (uintptr_t)&jfr->ubcore_jfr; + } + + return jfr; +} + +static bool udma_update_jfr_idx(struct udma_dev *dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr, + bool is_clean) +{ + struct udma_jetty_queue *queue; + uint8_t opcode = cqe->opcode; + struct udma_jfr *jfr; + uint32_t entry_idx; + + jfr = udma_get_jfr(dev, cqe, cr); + if (!jfr) + return true; + + queue = &jfr->rq; + entry_idx = cqe->entry_idx; + cr->user_ctx = queue->wrid[entry_idx & (queue->buf.entry_cnt - (uint32_t)1)]; + + if (!is_clean && cqe->inline_en) + udma_handle_inline_cqe(cqe, opcode, queue, cr); + + if (!jfr->ubcore_jfr.jfr_cfg.flag.bs.lock_free) + spin_lock(&jfr->lock); + + udma_id_free(&jfr->idx_que.jfr_idx_table.ida_table, entry_idx); + queue->ci++; + + if (!jfr->ubcore_jfr.jfr_cfg.flag.bs.lock_free) + spin_unlock(&jfr->lock); + + return false; +} + +static enum jfc_poll_state udma_parse_cqe_for_send(struct udma_dev *dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr) +{ + struct udma_jetty_queue *queue; + struct udma_jetty *jetty; + struct udma_jfs *jfs; + + queue = (struct udma_jetty_queue *)(uintptr_t)( + (uint64_t)cqe->user_data_h << UDMA_ADDR_SHIFT | + cqe->user_data_l); + if (!queue) { + dev_err(dev->dev, "jetty queue addr is null, jetty_id = %u.\n", cr->local_id); + return JFC_POLL_ERR; + } + + if (unlikely(udma_get_cr_status(dev, cqe->status, cqe->substatus, &cr->status))) + return JFC_POLL_ERR; + + if (!!cqe->fd) { + cr->status = UBCORE_CR_WR_FLUSH_ERR_DONE; + queue->flush_flag = true; + } else { + queue->ci += (cqe->entry_idx - queue->ci) & (queue->buf.entry_cnt - 1); + cr->user_ctx = queue->wrid[queue->ci & (queue->buf.entry_cnt - 1)]; + queue->ci++; + } + + if (!!cr->flag.bs.jetty) { + jetty = to_udma_jetty_from_queue(queue); + cr->user_data = (uintptr_t)&jetty->ubcore_jetty; + } else { + jfs = container_of(queue, struct udma_jfs, sq); + cr->user_data = (uintptr_t)&jfs->ubcore_jfs; + } + + return JFC_OK; +} + +static enum jfc_poll_state udma_parse_cqe_for_recv(struct udma_dev *dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr, + struct list_head *tid_list) +{ + uint8_t substatus; + uint8_t status; + + if (unlikely(udma_update_jfr_idx(dev, cqe, cr, false))) + return JFC_POLL_ERR; + + udma_parse_opcode_for_res(dev, cqe, cr, tid_list); + status = cqe->status; + substatus = cqe->substatus; + if (unlikely(udma_get_cr_status(dev, status, substatus, &cr->status))) + return JFC_POLL_ERR; + + return JFC_OK; +} + +static enum jfc_poll_state parse_cqe_for_jfc(struct udma_dev *dev, + struct udma_jfc_cqe *cqe, + struct ubcore_cr *cr, + struct list_head *tid_list) +{ + enum jfc_poll_state ret; + + cr->flag.bs.s_r = cqe->s_r; + cr->flag.bs.jetty = cqe->is_jetty; + cr->completion_len = cqe->byte_cnt; + cr->tpn = cqe->tpn; + cr->local_id = cqe->local_num_h << UDMA_SRC_IDX_SHIFT | cqe->local_num_l; + cr->remote_id.id = cqe->rmt_idx; + udma_swap_endian((uint8_t *)(cqe->rmt_eid), cr->remote_id.eid.raw, UBCORE_EID_SIZE); + + if (cqe->s_r == CQE_FOR_RECEIVE) + ret = udma_parse_cqe_for_recv(dev, cqe, cr, tid_list); + else + ret = udma_parse_cqe_for_send(dev, cqe, cr); + + return ret; +} + +static struct udma_jfc_cqe *get_next_cqe(struct udma_jfc *jfc, uint32_t n) +{ + struct udma_jfc_cqe *cqe; + uint32_t valid_owner; + + cqe = (struct udma_jfc_cqe *)get_buf_entry(&jfc->buf, n); + + valid_owner = (n >> jfc->cq_shift) & UDMA_JFC_DB_VALID_OWNER_M; + if (!(cqe->owner ^ valid_owner)) + return NULL; + + return cqe; +} + +static void dump_cqe_aux_info(struct udma_dev *dev, struct ubcore_cr *cr) +{ + struct ubcore_user_ctl_out out = {}; + struct ubcore_user_ctl_in in = {}; + struct udma_cqe_info_in info_in; + + info_in.status = cr->status; + info_in.s_r = cr->flag.bs.s_r; + in.addr = (uint64_t)&info_in; + in.len = sizeof(struct udma_cqe_info_in); + in.opcode = UDMA_USER_CTL_QUERY_CQE_AUX_INFO; + + (void)udma_query_cqe_aux_info(&dev->ub_dev, NULL, &in, &out); +} + +static enum jfc_poll_state udma_poll_one(struct udma_dev *dev, + struct udma_jfc *jfc, + struct ubcore_cr *cr, + struct list_head *tid_list) +{ + struct udma_jfc_cqe *cqe; + + cqe = get_next_cqe(jfc, jfc->ci); + if (!cqe) + return JFC_EMPTY; + + ++jfc->ci; + /* Memory barrier */ + rmb(); + + if (parse_cqe_for_jfc(dev, cqe, cr, tid_list)) + return JFC_POLL_ERR; + + if (unlikely(cr->status != UBCORE_CR_SUCCESS) && dump_aux_info) + dump_cqe_aux_info(dev, cr); + + return JFC_OK; +} + +static void udma_inv_tid(struct udma_dev *dev, struct list_head *tid_list) +{ + struct udma_inv_tid *tid_node; + struct udma_inv_tid *tmp; + struct iommu_sva *ksva; + uint32_t tid; + + mutex_lock(&dev->ksva_mutex); + list_for_each_entry_safe(tid_node, tmp, tid_list, list) { + tid = tid_node->tid; + ksva = (struct iommu_sva *)xa_load(&dev->ksva_table, tid); + if (!ksva) { + dev_warn(dev->dev, "tid may have been released.\n"); + } else { + ummu_ksva_unbind_device(ksva); + __xa_erase(&dev->ksva_table, tid); + } + + list_del(&tid_node->list); + kfree(tid_node); + } + mutex_unlock(&dev->ksva_mutex); +} + +/* thanks to drivers/infiniband/hw/bnxt_re/ib_verbs.c */ +int udma_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr) +{ + struct udma_dev *dev = to_udma_dev(jfc->ub_dev); + struct udma_jfc *udma_jfc = to_udma_jfc(jfc); + enum jfc_poll_state err = JFC_OK; + struct list_head tid_list; + uint32_t ci; + int npolled; + + INIT_LIST_HEAD(&tid_list); + + if (!jfc->jfc_cfg.flag.bs.lock_free) + spin_lock(&udma_jfc->lock); + + for (npolled = 0; npolled < cr_cnt; ++npolled) { + err = udma_poll_one(dev, udma_jfc, cr + npolled, &tid_list); + if (err != JFC_OK) + break; + } + + if (npolled) { + ci = udma_jfc->ci; + *udma_jfc->db.db_record = ci & (uint32_t)UDMA_JFC_DB_CI_IDX_M; + } + + if (!jfc->jfc_cfg.flag.bs.lock_free) + spin_unlock(&udma_jfc->lock); + + if (!list_empty(&tid_list)) + udma_inv_tid(dev, &tid_list); + + return err == JFC_POLL_ERR ? -UDMA_INTER_ERR : npolled; +} + +void udma_clean_jfc(struct ubcore_jfc *jfc, uint32_t jetty_id, struct udma_dev *udma_dev) +{ + struct udma_jfc *udma_jfc = to_udma_jfc(jfc); + struct udma_jfc_cqe *dest; + struct udma_jfc_cqe *cqe; + struct ubcore_cr cr; + uint32_t nfreed = 0; + uint32_t local_id; + uint8_t owner_bit; + uint32_t pi; + + if (udma_jfc->mode != (uint32_t)UDMA_NORMAL_JFC_TYPE) + return; + + if (!jfc->jfc_cfg.flag.bs.lock_free) + spin_lock(&udma_jfc->lock); + + for (pi = udma_jfc->ci; get_next_cqe(udma_jfc, pi) != NULL; ++pi) { + if (pi > udma_jfc->ci + udma_jfc->buf.entry_cnt) + break; + } + while ((int) --pi - (int) udma_jfc->ci >= 0) { + cqe = get_buf_entry(&udma_jfc->buf, pi); + /* make sure cqe buffer is valid */ + rmb(); + local_id = (cqe->local_num_h << UDMA_SRC_IDX_SHIFT) | cqe->local_num_l; + if (local_id == jetty_id) { + if (cqe->s_r == CQE_FOR_RECEIVE) { + cr.local_id = local_id; + (void)udma_update_jfr_idx(udma_dev, cqe, &cr, true); + } + + ++nfreed; + } else if (!!nfreed) { + dest = get_buf_entry(&udma_jfc->buf, pi + nfreed); + /* make sure owner bit is valid */ + rmb(); + owner_bit = dest->owner; + (void)memcpy(dest, cqe, udma_dev->caps.cqe_size); + dest->owner = owner_bit; + } + } + + if (!!nfreed) { + udma_jfc->ci += nfreed; + wmb(); /* be sure software get cqe data before update doorbell */ + *udma_jfc->db.db_record = udma_jfc->ci & (uint32_t)UDMA_JFC_DB_CI_IDX_M; + } + + if (!jfc->jfc_cfg.flag.bs.lock_free) + spin_unlock(&udma_jfc->lock); +} diff --git a/drivers/ub/urma/hw/udma/udma_jfc.h b/drivers/ub/urma/hw/udma/udma_jfc.h index 29db1243623e352559b796754037a9c7883665ac..6f62f33eccdf49f77219b677064a509b8b9cafb3 100644 --- a/drivers/ub/urma/hw/udma/udma_jfc.h +++ b/drivers/ub/urma/hw/udma/udma_jfc.h @@ -18,6 +18,9 @@ #define UDMA_STARS_SWITCH 1 +#define UDMA_JFC_DB_CI_IDX_M GENMASK(21, 0) +#define UDMA_CQE_INV_TOKEN_ID GENMASK(19, 0) + enum udma_jfc_state { UDMA_JFC_STATE_INVALID, UDMA_JFC_STATE_VALID, @@ -131,6 +134,46 @@ struct udma_jfc_ctx { uint32_t rsv11[12]; }; +struct udma_jfc_cqe { + /* DW0 */ + uint32_t s_r : 1; + uint32_t is_jetty : 1; + uint32_t owner : 1; + uint32_t inline_en : 1; + uint32_t opcode : 3; + uint32_t fd : 1; + uint32_t rsv : 8; + uint32_t substatus : 8; + uint32_t status : 8; + /* DW1 */ + uint32_t entry_idx : 16; + uint32_t local_num_l : 16; + /* DW2 */ + uint32_t local_num_h : 4; + uint32_t rmt_idx : 20; + uint32_t rsv1 : 8; + /* DW3 */ + uint32_t tpn : 24; + uint32_t rsv2 : 8; + /* DW4 */ + uint32_t byte_cnt; + /* DW5 ~ DW6 */ + uint32_t user_data_l; + uint32_t user_data_h; + /* DW7 ~ DW10 */ + uint32_t rmt_eid[4]; + /* DW11 ~ DW12 */ + uint32_t data_l; + uint32_t data_h; + /* DW13 ~ DW15 */ + uint32_t inline_data[3]; +}; + +struct udma_inv_tid { + uint32_t tid; + struct list_head list; +}; + static inline struct udma_jfc *to_udma_jfc(struct ubcore_jfc *jfc) { return container_of(jfc, struct udma_jfc, base); @@ -144,5 +187,12 @@ int udma_jfc_completion(struct notifier_block *nb, unsigned long jfcn, void *data); int udma_modify_jfc(struct ubcore_jfc *ubcore_jfc, struct ubcore_jfc_attr *attr, struct ubcore_udata *udata); +int udma_rearm_jfc(struct ubcore_jfc *jfc, bool solicited_only); +int udma_poll_jfc(struct ubcore_jfc *jfc, int cr_cnt, struct ubcore_cr *cr); +int udma_check_jfc_cfg(struct udma_dev *dev, struct udma_jfc *jfc, + struct ubcore_jfc_cfg *cfg); +void udma_init_jfc_param(struct ubcore_jfc_cfg *cfg, struct udma_jfc *jfc); +int udma_post_create_jfc_mbox(struct udma_dev *dev, struct udma_jfc *jfc); +void udma_clean_jfc(struct ubcore_jfc *jfc, uint32_t jetty_id, struct udma_dev *udma_dev); #endif /* __UDMA_JFC_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_jfr.c b/drivers/ub/urma/hw/udma/udma_jfr.c index 953fcffc50018daf32fd6ed641e478ac072923a2..6bfc135fa84617f8b8f29aa58d94f89c70b8c530 100644 --- a/drivers/ub/urma/hw/udma/udma_jfr.c +++ b/drivers/ub/urma/hw/udma/udma_jfr.c @@ -48,29 +48,21 @@ static int udma_verify_jfr_param(struct udma_dev *dev, static int udma_get_k_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr) { - uint32_t rqe_buf_size; uint32_t idx_buf_size; - uint32_t sge_per_wqe; int ret; - sge_per_wqe = min(jfr->max_sge, dev->caps.jfr_sge); - jfr->rq.buf.entry_size = UDMA_SGE_SIZE * sge_per_wqe; + jfr->rq.buf.entry_size = UDMA_SGE_SIZE * min(jfr->max_sge, dev->caps.jfr_sge); jfr->rq.buf.entry_cnt = jfr->wqe_cnt; - rqe_buf_size = jfr->rq.buf.entry_size * jfr->rq.buf.entry_cnt; - - ret = udma_k_alloc_buf(dev, rqe_buf_size, &jfr->rq.buf); + ret = udma_k_alloc_buf(dev, &jfr->rq.buf); if (ret) { - dev_err(dev->dev, - "failed to alloc rq buffer for jfr when buffer size = %u.\n", - rqe_buf_size); + dev_err(dev->dev, "failed to alloc rq buffer, id=%u.\n", jfr->rq.id); return ret; } jfr->idx_que.buf.entry_size = UDMA_IDX_QUE_ENTRY_SZ; jfr->idx_que.buf.entry_cnt = jfr->wqe_cnt; idx_buf_size = jfr->idx_que.buf.entry_size * jfr->idx_que.buf.entry_cnt; - - ret = udma_k_alloc_buf(dev, idx_buf_size, &jfr->idx_que.buf); + ret = udma_alloc_normal_buf(dev, idx_buf_size, &jfr->idx_que.buf); if (ret) { dev_err(dev->dev, "failed to alloc idx que buffer for jfr when buffer size = %u.\n", @@ -98,26 +90,24 @@ static int udma_get_k_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr) err_alloc_db: kfree(jfr->rq.wrid); err_wrid: - udma_k_free_buf(dev, idx_buf_size, &jfr->idx_que.buf); + udma_free_normal_buf(dev, idx_buf_size, &jfr->idx_que.buf); err_idx_que: - udma_k_free_buf(dev, rqe_buf_size, &jfr->rq.buf); + udma_k_free_buf(dev, &jfr->rq.buf); return -ENOMEM; } -static int udma_get_u_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr, - struct ubcore_udata *udata, +static int udma_jfr_get_u_cmd(struct udma_dev *dev, struct ubcore_udata *udata, struct udma_create_jetty_ucmd *ucmd) { unsigned long byte; - int ret; if (!udata->udrv_data) { dev_err(dev->dev, "jfr udata udrv_data is null.\n"); return -EINVAL; } - if (!udata->udrv_data->in_addr || udata->udrv_data->in_len < sizeof(*ucmd)) { + if (!udata->udrv_data->in_addr || udata->udrv_data->in_len != sizeof(*ucmd)) { dev_err(dev->dev, "jfr in_len %u or addr is invalid.\n", udata->udrv_data->in_len); return -EINVAL; @@ -131,14 +121,41 @@ static int udma_get_u_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr, return -EFAULT; } - if (!ucmd->non_pin) { + return 0; +} + +static int udma_get_u_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr, + struct ubcore_udata *udata, + struct udma_create_jetty_ucmd *ucmd) +{ + int ret; + + ret = udma_jfr_get_u_cmd(dev, udata, ucmd); + if (ret) + return ret; + + jfr->udma_ctx = to_udma_context(udata->uctx); + if (ucmd->non_pin) { + jfr->rq.buf.addr = ucmd->buf_addr; + } else if (ucmd->is_hugepage) { + jfr->rq.buf.addr = ucmd->buf_addr; + if (udma_occupy_u_hugepage(jfr->udma_ctx, (void *)jfr->rq.buf.addr)) { + dev_err(dev->dev, "failed to create rq, va not map.\n"); + return -EINVAL; + } + jfr->rq.buf.is_hugepage = true; + } else { ret = pin_queue_addr(dev, ucmd->buf_addr, ucmd->buf_len, &jfr->rq.buf); if (ret) { dev_err(dev->dev, "failed to pin jfr rqe buf addr, ret = %d.\n", ret); return ret; } + } + if (ucmd->non_pin) { + jfr->idx_que.buf.addr = ucmd->idx_addr; + } else { ret = pin_queue_addr(dev, ucmd->idx_addr, ucmd->idx_len, &jfr->idx_que.buf); if (ret) { @@ -146,12 +163,8 @@ static int udma_get_u_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr, "failed to pin jfr idx que addr, ret = %d.\n", ret); goto err_pin_idx_buf; } - } else { - jfr->rq.buf.addr = ucmd->buf_addr; - jfr->idx_que.buf.addr = ucmd->idx_addr; } - jfr->udma_ctx = to_udma_context(udata->uctx); jfr->sw_db.db_addr = ucmd->db_addr; jfr->jfr_sleep_buf.db_addr = ucmd->jfr_sleep_buf; @@ -181,7 +194,10 @@ static int udma_get_u_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr, err_pin_sw_db: unpin_queue_addr(jfr->idx_que.buf.umem); err_pin_idx_buf: - unpin_queue_addr(jfr->rq.buf.umem); + if (ucmd->is_hugepage) + udma_return_u_hugepage(jfr->udma_ctx, (void *)jfr->rq.buf.addr); + else + unpin_queue_addr(jfr->rq.buf.umem); return ret; } @@ -205,19 +221,21 @@ static void udma_put_jfr_buf(struct udma_dev *dev, struct udma_jfr *jfr) udma_unpin_sw_db(jfr->udma_ctx, &jfr->jfr_sleep_buf); udma_unpin_sw_db(jfr->udma_ctx, &jfr->sw_db); unpin_queue_addr(jfr->idx_que.buf.umem); - unpin_queue_addr(jfr->rq.buf.umem); + if (jfr->rq.buf.is_hugepage) + udma_return_u_hugepage(jfr->udma_ctx, (void *)jfr->rq.buf.addr); + else + unpin_queue_addr(jfr->rq.buf.umem); return; } if (jfr->rq.buf.kva) { - size = jfr->rq.buf.entry_cnt * jfr->rq.buf.entry_size; - udma_k_free_buf(dev, size, &jfr->rq.buf); + udma_k_free_buf(dev, &jfr->rq.buf); udma_free_sw_db(dev, &jfr->sw_db); } if (jfr->idx_que.buf.kva) { size = jfr->idx_que.buf.entry_cnt * jfr->idx_que.buf.entry_size; - udma_k_free_buf(dev, size, &jfr->idx_que.buf); + udma_free_normal_buf(dev, size, &jfr->idx_que.buf); udma_destroy_udma_table(dev, &jfr->idx_que.jfr_idx_table, "JFR_IDX"); } @@ -569,6 +587,9 @@ static void udma_free_jfr(struct ubcore_jfr *jfr) struct udma_dev *udma_dev = to_udma_dev(jfr->ub_dev); struct udma_jfr *udma_jfr = to_udma_jfr(jfr); + if (udma_jfr->rq.buf.kva && jfr->jfr_cfg.jfc) + udma_clean_jfc(jfr->jfr_cfg.jfc, udma_jfr->rq.id, udma_dev); + if (dfx_switch) udma_dfx_delete_id(udma_dev, &udma_dev->dfx_info->jfr, udma_jfr->rq.id); @@ -790,3 +811,138 @@ int udma_modify_jfr(struct ubcore_jfr *jfr, struct ubcore_jfr_attr *attr, return 0; } + +int udma_unimport_jfr(struct ubcore_tjetty *tjfr) +{ + struct udma_target_jetty *udma_tjfr = to_udma_tjetty(tjfr); + + udma_tjfr->token_value = 0; + tjfr->cfg.token_value.token = 0; + + kfree(udma_tjfr); + + return 0; +} + +static void fill_wqe_idx(struct udma_jfr *jfr, uint32_t wqe_idx) +{ + uint32_t *idx_buf; + + idx_buf = (uint32_t *)get_buf_entry(&jfr->idx_que.buf, jfr->rq.pi); + *idx_buf = cpu_to_le32(wqe_idx); + + jfr->rq.pi++; +} + +static void fill_recv_sge_to_wqe(struct ubcore_jfr_wr *wr, void *wqe, + uint32_t max_sge) +{ + struct udma_wqe_sge *sge = (struct udma_wqe_sge *)wqe; + uint32_t i, cnt; + + for (i = 0, cnt = 0; i < wr->src.num_sge; i++) { + if (!wr->src.sge[i].len) + continue; + set_data_of_sge(sge + cnt, wr->src.sge + i); + ++cnt; + } + + if (cnt < max_sge) + memset(sge + cnt, 0, (max_sge - cnt) * UDMA_SGE_SIZE); +} + +static int post_recv_one(struct udma_dev *dev, struct udma_jfr *jfr, + struct ubcore_jfr_wr *wr) +{ + uint32_t wqe_idx; + int ret = 0; + void *wqe; + + if (unlikely(wr->src.num_sge > jfr->max_sge)) { + dev_err(dev->dev, + "failed to check sge, wr_num_sge = %u, max_sge = %u, jfrn = %u.\n", + wr->src.num_sge, jfr->max_sge, jfr->rq.id); + return -EINVAL; + } + + if (udma_jfrwq_overflow(jfr)) { + dev_err(dev->dev, "failed to check jfrwq, jfrwq is full, jfrn = %u.\n", + jfr->rq.id); + return -ENOMEM; + } + + ret = udma_id_alloc(dev, &jfr->idx_que.jfr_idx_table.ida_table, + &wqe_idx); + if (ret) { + dev_err(dev->dev, "failed to get jfr wqe idx.\n"); + return ret; + } + wqe = get_buf_entry(&jfr->rq.buf, wqe_idx); + + fill_recv_sge_to_wqe(wr, wqe, jfr->max_sge); + + fill_wqe_idx(jfr, wqe_idx); + + jfr->rq.wrid[wqe_idx] = wr->user_ctx; + + return ret; +} + +/* thanks to drivers/infiniband/hw/bnxt_re/ib_verbs.c */ +int udma_post_jfr_wr(struct ubcore_jfr *ubcore_jfr, struct ubcore_jfr_wr *wr, + struct ubcore_jfr_wr **bad_wr) +{ + struct udma_dev *dev = to_udma_dev(ubcore_jfr->ub_dev); + struct udma_jfr *jfr = to_udma_jfr(ubcore_jfr); + uint32_t nreq; + int ret = 0; + + if (!ubcore_jfr->jfr_cfg.flag.bs.lock_free) + spin_lock(&jfr->lock); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ret = post_recv_one(dev, jfr, wr); + if (ret) { + *bad_wr = wr; + break; + } + } + + if (likely(nreq)) { + /* + * Ensure that the pipeline fills all RQEs into the RQ queue, + * then updating the PI pointer. + */ + wmb(); + *jfr->sw_db.db_record = jfr->rq.pi & + (uint32_t)UDMA_JFR_DB_PI_M; + } + + if (!ubcore_jfr->jfr_cfg.flag.bs.lock_free) + spin_unlock(&jfr->lock); + + return ret; +} + +struct ubcore_tjetty *udma_import_jfr_ex(struct ubcore_device *dev, + struct ubcore_tjetty_cfg *cfg, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata) +{ + struct udma_target_jetty *udma_tjfr; + + udma_tjfr = kzalloc(sizeof(*udma_tjfr), GFP_KERNEL); + if (!udma_tjfr) + return NULL; + + if (!udata) { + if (cfg->flag.bs.token_policy != UBCORE_TOKEN_NONE) { + udma_tjfr->token_value = cfg->token_value.token; + udma_tjfr->token_value_valid = true; + } + } + + udma_swap_endian(cfg->id.eid.raw, udma_tjfr->le_eid.raw, UBCORE_EID_SIZE); + + return &udma_tjfr->ubcore_tjetty; +} diff --git a/drivers/ub/urma/hw/udma/udma_jfr.h b/drivers/ub/urma/hw/udma/udma_jfr.h index ae6d0d97f4608daa5edba9c33082af9e92bef045..c446eaedee1db5fc95fe89cf3834fc2358942e1a 100644 --- a/drivers/ub/urma/hw/udma/udma_jfr.h +++ b/drivers/ub/urma/hw/udma/udma_jfr.h @@ -95,6 +95,12 @@ struct udma_jfr { struct completion ae_comp; }; +struct udma_wqe_sge { + uint32_t length; + uint32_t token_id; + uint64_t va; +}; + struct udma_jfr_ctx { /* DW0 */ uint32_t state : 2; @@ -150,6 +156,17 @@ static inline struct udma_jfr *to_udma_jfr(struct ubcore_jfr *jfr) return container_of(jfr, struct udma_jfr, ubcore_jfr); } +static inline bool udma_jfrwq_overflow(struct udma_jfr *jfr) +{ + return (jfr->rq.pi - jfr->rq.ci) >= jfr->wqe_cnt; +} + +static inline void set_data_of_sge(struct udma_wqe_sge *sge, struct ubcore_sge *sg) +{ + sge->va = cpu_to_le64(sg->addr); + sge->length = cpu_to_le32(sg->len); +} + static inline struct udma_jfr *to_udma_jfr_from_queue(struct udma_jetty_queue *queue) { return container_of(queue, struct udma_jfr, rq); @@ -161,5 +178,12 @@ struct ubcore_jfr *udma_create_jfr(struct ubcore_device *dev, struct ubcore_jfr_ struct ubcore_udata *udata); int udma_destroy_jfr(struct ubcore_jfr *jfr); int udma_destroy_jfr_batch(struct ubcore_jfr **jfr_arr, int jfr_num, int *bad_jfr_index); +int udma_unimport_jfr(struct ubcore_tjetty *tjfr); +struct ubcore_tjetty *udma_import_jfr_ex(struct ubcore_device *dev, + struct ubcore_tjetty_cfg *cfg, + struct ubcore_active_tp_cfg *active_tp_cfg, + struct ubcore_udata *udata); +int udma_post_jfr_wr(struct ubcore_jfr *ubcore_jfr, struct ubcore_jfr_wr *wr, + struct ubcore_jfr_wr **bad_wr); #endif /* __UDMA_JFR_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_jfs.c b/drivers/ub/urma/hw/udma/udma_jfs.c index e770bc5f6a2ffb4769d9647fced06de888586c9a..7277db44da128f8fd30500259bdb74f96cc06ed3 100644 --- a/drivers/ub/urma/hw/udma/udma_jfs.c +++ b/drivers/ub/urma/hw/udma/udma_jfs.c @@ -21,7 +21,7 @@ int udma_alloc_u_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, struct udma_create_jetty_ucmd *ucmd) { - int ret; + int ret = 0; if (ucmd->sqe_bb_cnt == 0 || ucmd->buf_len == 0) { dev_err(dev->dev, "invalid param, sqe_bb_cnt=%u, buf_len=%u.\n", @@ -33,17 +33,22 @@ int udma_alloc_u_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, sq->buf.entry_cnt = ucmd->buf_len >> WQE_BB_SIZE_SHIFT; if (sq->non_pin) { sq->buf.addr = ucmd->buf_addr; + } else if (ucmd->is_hugepage) { + sq->buf.addr = ucmd->buf_addr; + if (udma_occupy_u_hugepage(sq->udma_ctx, (void *)sq->buf.addr)) { + dev_err(dev->dev, "failed to create sq, va not map.\n"); + return -EINVAL; + } + sq->buf.is_hugepage = true; } else { ret = pin_queue_addr(dev, ucmd->buf_addr, ucmd->buf_len, &sq->buf); if (ret) { - dev_err(dev->dev, - "failed to pin jetty/jfs queue addr, ret = %d.\n", - ret); + dev_err(dev->dev, "failed to pin sq, ret = %d.\n", ret); return ret; } } - return 0; + return ret; } int udma_alloc_k_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, @@ -71,19 +76,18 @@ int udma_alloc_k_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, size = ALIGN(wqe_bb_depth * sq->buf.entry_size, UDMA_HW_PAGE_SIZE); sq->buf.entry_cnt = size >> WQE_BB_SIZE_SHIFT; - ret = udma_k_alloc_buf(dev, size, &sq->buf); + ret = udma_k_alloc_buf(dev, &sq->buf); if (ret) { - dev_err(dev->dev, - "failed to alloc jetty (%u) sq buf when size = %u.\n", sq->id, size); + dev_err(dev->dev, "failed to alloc sq buffer, id=%u.\n", sq->id); return ret; } sq->wrid = kcalloc(1, sq->buf.entry_cnt * sizeof(uint64_t), GFP_KERNEL); if (!sq->wrid) { - udma_k_free_buf(dev, size, &sq->buf); dev_err(dev->dev, "failed to alloc wrid for jfs id = %u when entry cnt = %u.\n", sq->id, sq->buf.entry_cnt); + udma_k_free_buf(dev, &sq->buf); return -ENOMEM; } @@ -95,18 +99,20 @@ int udma_alloc_k_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, void udma_free_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq) { - uint32_t size; - if (sq->buf.kva) { - size = sq->buf.entry_cnt * sq->buf.entry_size; - udma_k_free_buf(dev, size, &sq->buf); + udma_k_free_buf(dev, &sq->buf); kfree(sq->wrid); return; } + if (sq->non_pin) return; - unpin_queue_addr(sq->buf.umem); + if (sq->buf.is_hugepage) { + udma_return_u_hugepage(sq->udma_ctx, (void *)sq->buf.addr); + } else { + unpin_queue_addr(sq->buf.umem); + } } void udma_init_jfsc(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, @@ -155,6 +161,40 @@ void udma_init_jfsc(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, ctx->next_rcv_ssn = ctx->next_send_ssn; } +int udma_verify_jfs_param(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, + bool enable_stars) +{ + if (!cfg->depth || cfg->depth > dev->caps.jfs.depth || + cfg->max_sge > dev->caps.jfs_sge || cfg->trans_mode == UBCORE_TP_RC) { + dev_err(dev->dev, + "jfs param is invalid, depth = %u, seg = %u, max_depth = %u, max_jfs_seg = %u, trans_mode = %u.\n", + cfg->depth, cfg->max_sge, dev->caps.jfs.depth, + dev->caps.jfs_sge, cfg->trans_mode); + return -EINVAL; + } + + if (enable_stars && cfg->max_inline_data != 0 && + cfg->max_inline_data > dev->caps.jfs_inline_sz) { + dev_err(dev->dev, "jfs param is invalid, inline_data:%u, max_inline_len:%u.\n", + cfg->max_inline_data, dev->caps.jfs_inline_sz); + return -EINVAL; + } + + if (enable_stars && cfg->max_rsge > dev->caps.jfs_rsge) { + dev_err(dev->dev, "jfs param is invalid, rsge:%u, max_rsge:%u.\n", + cfg->max_rsge, dev->caps.jfs_rsge); + return -EINVAL; + } + + if (cfg->priority >= UDMA_MAX_PRIORITY) { + dev_err(dev->dev, "kernel mode jfs priority is out of range, priority is %u.\n", + cfg->priority); + return -EINVAL; + } + + return 0; +} + void udma_dfx_store_jfs_id(struct udma_dev *udma_dev, struct udma_jfs *udma_jfs) { struct udma_dfx_jfs *jfs; @@ -243,6 +283,7 @@ static int udma_get_user_jfs_cmd(struct udma_dev *dev, struct udma_jfs *jfs, } uctx = to_udma_context(udata->uctx); + jfs->sq.udma_ctx = uctx; jfs->sq.tid = uctx->tid; jfs->jfs_addr = ucmd->jetty_addr; jfs->pi_type = ucmd->pi_type; @@ -258,7 +299,7 @@ static int udma_get_user_jfs_cmd(struct udma_dev *dev, struct udma_jfs *jfs, } static int udma_alloc_jfs_sq(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, - struct udma_jfs *jfs, struct ubcore_udata *udata) + struct udma_jfs *jfs, struct ubcore_udata *udata) { struct udma_create_jetty_ucmd ucmd = {}; int ret; @@ -361,6 +402,8 @@ static void udma_free_jfs(struct ubcore_jfs *jfs) struct udma_dev *dev = to_udma_dev(jfs->ub_dev); struct udma_jfs *ujfs = to_udma_jfs(jfs); + udma_clean_cqe_for_jetty(dev, &ujfs->sq, jfs->jfs_cfg.jfc, NULL); + xa_erase(&dev->jetty_table.xa, ujfs->sq.id); if (refcount_dec_and_test(&ujfs->ae_refcount)) @@ -410,6 +453,48 @@ int udma_destroy_jfs(struct ubcore_jfs *jfs) return 0; } +int udma_destroy_jfs_batch(struct ubcore_jfs **jfs, int jfs_cnt, int *bad_jfs_index) +{ + struct udma_jetty_queue **sq_list; + struct udma_dev *udma_dev; + uint32_t i; + int ret; + + if (!jfs) { + pr_err("jfs array is null.\n"); + return -EINVAL; + } + + if (!jfs_cnt) { + pr_err("jfs cnt is 0.\n"); + return -EINVAL; + } + + udma_dev = to_udma_dev(jfs[0]->ub_dev); + + sq_list = kcalloc(jfs_cnt, sizeof(*sq_list), GFP_KERNEL); + if (!sq_list) + return -ENOMEM; + + for (i = 0; i < jfs_cnt; i++) + sq_list[i] = &(to_udma_jfs(jfs[i])->sq); + + ret = udma_batch_modify_and_destroy_jetty(udma_dev, sq_list, jfs_cnt, bad_jfs_index); + + kfree(sq_list); + + if (ret) { + dev_err(udma_dev->dev, + "udma batch modify error and destroy jfs failed.\n"); + return ret; + } + + for (i = 0; i < jfs_cnt; i++) + udma_free_jfs(jfs[i]); + + return 0; +} + static int udma_modify_jfs_state(struct udma_dev *udma_dev, struct udma_jfs *udma_jfs, struct ubcore_jfs_attr *attr) { @@ -486,3 +571,716 @@ int udma_modify_jfs(struct ubcore_jfs *jfs, struct ubcore_jfs_attr *attr, return 0; } + +static void fill_imm_data_or_token_for_cr(struct udma_dev *udma_dev, + struct udma_sqe_ctl *sqe_ctl, + struct ubcore_cr *cr, + uint32_t opcode) +{ + switch (opcode) { + case UDMA_OPC_SEND: + case UDMA_OPC_WRITE: + case UDMA_OPC_READ: + case UDMA_OPC_CAS: + case UDMA_OPC_FAA: + break; + case UDMA_OPC_SEND_WITH_IMM: + memcpy(&cr->imm_data, (void *)sqe_ctl + SQE_SEND_IMM_FIELD, + sizeof(uint64_t)); + break; + case UDMA_OPC_SEND_WITH_INVALID: + cr->invalid_token.token_id = sqe_ctl->rmt_addr_l_or_token_id; + cr->invalid_token.token_value.token = sqe_ctl->rmt_addr_h_or_token_value; + break; + case UDMA_OPC_WRITE_WITH_IMM: + memcpy(&cr->imm_data, (void *)sqe_ctl + SQE_WRITE_IMM_FIELD, + sizeof(uint64_t)); + break; + default: + dev_err(udma_dev->dev, "Flush invalid opcode :%u.\n", opcode); + break; + } +} + +static void fill_cr_by_sqe_ctl(struct udma_dev *udma_dev, + struct udma_sqe_ctl *sqe_ctl, + struct ubcore_cr *cr) +{ + uint32_t opcode = sqe_ctl->opcode; + struct udma_normal_sge *sge; + uint32_t src_sge_num = 0; + uint64_t total_len = 0; + uint32_t ctrl_len; + uint32_t i; + + fill_imm_data_or_token_for_cr(udma_dev, sqe_ctl, cr, opcode); + + cr->tpn = sqe_ctl->tpn; + cr->remote_id.id = sqe_ctl->rmt_obj_id; + memcpy(cr->remote_id.eid.raw, sqe_ctl->rmt_eid, UBCORE_EID_SIZE); + + if (sqe_ctl->inline_en) { + cr->completion_len = sqe_ctl->inline_msg_len; + return; + } + + src_sge_num = sqe_ctl->sge_num; + ctrl_len = get_ctl_len(opcode); + sge = (struct udma_normal_sge *)((void *)sqe_ctl + ctrl_len); + + for (i = 0; i < src_sge_num; i++) { + total_len += sge->length; + sge++; + } + + if (total_len > UINT32_MAX) { + cr->completion_len = UINT32_MAX; + dev_warn(udma_dev->dev, "total len %llu is overflow.\n", total_len); + } else { + cr->completion_len = total_len; + } +} + +static void udma_copy_from_sq(struct udma_jetty_queue *sq, uint32_t wqebb_cnt, + struct udma_jfs_wqebb *tmp_sq) +{ + uint32_t field_h; + uint32_t field_l; + uint32_t offset; + uint32_t remain; + + remain = sq->buf.entry_cnt - (sq->ci & (sq->buf.entry_cnt - 1)); + offset = (sq->ci & (sq->buf.entry_cnt - 1)) * UDMA_JFS_WQEBB_SIZE; + field_h = remain > wqebb_cnt ? wqebb_cnt : remain; + field_l = wqebb_cnt > field_h ? wqebb_cnt - field_h : 0; + + memcpy(tmp_sq, sq->buf.kva + offset, field_h * sizeof(*tmp_sq)); + + if (field_l) + memcpy(tmp_sq + field_h, sq->buf.kva, field_l * sizeof(*tmp_sq)); +} + +static uint32_t get_wqebb_num(struct udma_sqe_ctl *sqe_ctl) +{ + uint32_t opcode = sqe_ctl->opcode; + uint32_t sqe_ctl_len = get_ctl_len(opcode); + + switch (opcode) { + case UDMA_OPC_SEND: + case UDMA_OPC_SEND_WITH_IMM: + case UDMA_OPC_SEND_WITH_INVALID: + case UDMA_OPC_WRITE: + case UDMA_OPC_WRITE_WITH_IMM: + if (sqe_ctl->inline_en) + return (sqe_ctl_len + sqe_ctl->inline_msg_len - 1) / + UDMA_JFS_WQEBB_SIZE + 1; + break; + case UDMA_OPC_CAS: + case UDMA_OPC_FAA: + return ATOMIC_WQEBB_CNT; + case UDMA_OPC_NOP: + return NOP_WQEBB_CNT; + default: + break; + } + + return sq_cal_wqebb_num(sqe_ctl_len, sqe_ctl->sge_num); +} + +void udma_flush_sq(struct udma_dev *udma_dev, + struct udma_jetty_queue *sq, struct ubcore_cr *cr) +{ + struct udma_jfs_wqebb tmp_sq[MAX_WQEBB_NUM] = {}; + + udma_copy_from_sq(sq, MAX_WQEBB_NUM, tmp_sq); + fill_cr_by_sqe_ctl(udma_dev, (struct udma_sqe_ctl *)tmp_sq, cr); + cr->status = UBCORE_CR_WR_UNHANDLED; + cr->user_ctx = sq->wrid[sq->ci & (sq->buf.entry_cnt - 1)]; + /* Fill in UINT8_MAX for send direction */ + cr->opcode = UINT8_MAX; + cr->local_id = sq->id; + + sq->ci += get_wqebb_num((struct udma_sqe_ctl *)tmp_sq); +} + +int udma_flush_jfs(struct ubcore_jfs *jfs, int cr_cnt, struct ubcore_cr *cr) +{ + struct udma_dev *udma_dev = to_udma_dev(jfs->ub_dev); + struct udma_jfs *udma_jfs = to_udma_jfs(jfs); + struct udma_jetty_queue *sq = &udma_jfs->sq; + int n_flushed; + + if (!sq->flush_flag) + return 0; + + if (!jfs->jfs_cfg.flag.bs.lock_free) + spin_lock(&sq->lock); + + for (n_flushed = 0; n_flushed < cr_cnt; n_flushed++) { + if (sq->ci == sq->pi) + break; + udma_flush_sq(udma_dev, sq, cr + n_flushed); + } + + if (!jfs->jfs_cfg.flag.bs.lock_free) + spin_unlock(&sq->lock); + + return n_flushed; +} + +static uint8_t udma_get_jfs_opcode(enum ubcore_opcode opcode) +{ + switch (opcode) { + case UBCORE_OPC_SEND: + return UDMA_OPC_SEND; + case UBCORE_OPC_SEND_IMM: + return UDMA_OPC_SEND_WITH_IMM; + case UBCORE_OPC_SEND_INVALIDATE: + return UDMA_OPC_SEND_WITH_INVALID; + case UBCORE_OPC_WRITE: + return UDMA_OPC_WRITE; + case UBCORE_OPC_WRITE_IMM: + return UDMA_OPC_WRITE_WITH_IMM; + case UBCORE_OPC_READ: + return UDMA_OPC_READ; + case UBCORE_OPC_CAS: + return UDMA_OPC_CAS; + case UBCORE_OPC_FADD: + return UDMA_OPC_FAA; + case UBCORE_OPC_NOP: + return UDMA_OPC_NOP; + default: + return UDMA_OPC_INVALID; + } +} + +static int +udma_fill_sw_sge(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + struct ubcore_jfs_wr *wr, uint32_t max_inline_size, + struct udma_normal_sge *sge) +{ + struct ubcore_sge *sge_info; + uint32_t total_len = 0; + uint32_t sge_num = 0; + uint32_t num_sge; + uint32_t i; + + switch (wr->opcode) { + case UBCORE_OPC_SEND: + case UBCORE_OPC_SEND_IMM: + case UBCORE_OPC_SEND_INVALIDATE: + sge_info = wr->send.src.sge; + num_sge = wr->send.src.num_sge; + break; + case UBCORE_OPC_WRITE: + case UBCORE_OPC_WRITE_IMM: + sge_info = wr->rw.src.sge; + num_sge = wr->rw.src.num_sge; + break; + default: + return -EINVAL; + } + + if (wr->flag.bs.inline_flag) { + for (i = 0; i < num_sge; i++) { + if (total_len + sge_info[i].len > max_inline_size) { + dev_info(dev->dev, "inline_size %u is over max_size %u.\n", + total_len + sge_info[i].len, max_inline_size); + return -EINVAL; + } + + memcpy((void *)(uintptr_t)sge + total_len, + (void *)(uintptr_t)sge_info[i].addr, + sge_info[i].len); + total_len += sge_info[i].len; + } + sqe_ctl->inline_msg_len = total_len; + } else { + for (i = 0; i < num_sge; i++) { + if (sge_info[i].len == 0) + continue; + sge->va = sge_info[i].addr; + sge->length = sge_info[i].len; + sge++; + sge_num++; + } + sqe_ctl->sge_num = sge_num; + } + + return 0; +} + +static int +udma_k_fill_send_sqe(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + struct ubcore_jfs_wr *wr, struct ubcore_tjetty *tjetty, + uint32_t max_inline_size) +{ + struct udma_target_jetty *udma_tjetty; + struct udma_token_info *token_info; + struct udma_segment *udma_seg; + struct udma_normal_sge *sge; + + sge = (struct udma_normal_sge *)(sqe_ctl + 1); + + if (udma_fill_sw_sge(dev, sqe_ctl, wr, max_inline_size, sge)) + return -EINVAL; + + udma_tjetty = to_udma_tjetty(tjetty); + sqe_ctl->target_hint = wr->send.target_hint; + sqe_ctl->rmt_obj_id = tjetty->cfg.id.id; + sqe_ctl->token_en = udma_tjetty->token_value_valid; + sqe_ctl->rmt_token_value = udma_tjetty->token_value; + + if (wr->opcode == UBCORE_OPC_SEND_IMM) { + memcpy((void *)sqe_ctl + SQE_SEND_IMM_FIELD, &wr->send.imm_data, + sizeof(uint64_t)); + } else if (wr->opcode == UBCORE_OPC_SEND_INVALIDATE) { + udma_seg = to_udma_seg(wr->send.tseg); + token_info = (struct udma_token_info *)&sqe_ctl->rmt_addr_l_or_token_id; + token_info->token_id = udma_seg->tid; + token_info->token_value = udma_seg->token_value; + } + + return 0; +} + +static int +udma_k_fill_write_sqe(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + struct ubcore_jfs_wr *wr, struct ubcore_tjetty *tjetty, + uint32_t max_inline_size) +{ + struct udma_token_info *token_info; + struct udma_segment *udma_seg; + struct udma_normal_sge *sge; + struct ubcore_sge *sge_info; + uint32_t ctrl_len; + + ctrl_len = get_ctl_len(sqe_ctl->opcode); + sge = (struct udma_normal_sge *)((void *)sqe_ctl + ctrl_len); + + if (udma_fill_sw_sge(dev, sqe_ctl, wr, max_inline_size, sge)) + return -EINVAL; + + sge_info = wr->rw.dst.sge; + udma_seg = to_udma_seg(sge_info[0].tseg); + + sqe_ctl->target_hint = wr->rw.target_hint; + sqe_ctl->rmt_obj_id = udma_seg->tid; + sqe_ctl->token_en = udma_seg->token_value_valid; + sqe_ctl->rmt_token_value = udma_seg->token_value; + sqe_ctl->rmt_addr_l_or_token_id = sge_info[0].addr & (uint32_t)SQE_CTL_RMA_ADDR_BIT; + sqe_ctl->rmt_addr_h_or_token_value = + (sge_info[0].addr >> (uint32_t)SQE_CTL_RMA_ADDR_OFFSET) & + (uint32_t)SQE_CTL_RMA_ADDR_BIT; + + if (sqe_ctl->opcode == UDMA_OPC_WRITE_WITH_IMM) { + memcpy((void *)sqe_ctl + SQE_WRITE_IMM_FIELD, &wr->rw.notify_data, + sizeof(uint64_t)); + token_info = (struct udma_token_info *) + ((void *)sqe_ctl + WRITE_IMM_TOKEN_FIELD); + token_info->token_id = tjetty->cfg.id.id; + token_info->token_value = tjetty->cfg.token_value.token; + } + + return 0; +} + +static int udma_k_fill_read_sqe(struct udma_sqe_ctl *sqe_ctl, struct ubcore_jfs_wr *wr) +{ + struct udma_segment *udma_seg; + struct udma_normal_sge *sge; + struct ubcore_sge *sge_info; + uint32_t sge_num = 0; + uint32_t num; + + sge = (struct udma_normal_sge *)(sqe_ctl + 1); + sge_info = wr->rw.dst.sge; + + for (num = 0; num < wr->rw.dst.num_sge; num++) { + if (sge_info[num].len == 0) + continue; + sge->va = sge_info[num].addr; + sge->length = sge_info[num].len; + sge++; + sge_num++; + } + + sge_info = wr->rw.src.sge; + udma_seg = to_udma_seg(sge_info[0].tseg); + + sqe_ctl->sge_num = sge_num; + sqe_ctl->rmt_obj_id = udma_seg->tid; + sqe_ctl->token_en = udma_seg->token_value_valid; + sqe_ctl->rmt_token_value = udma_seg->token_value; + sqe_ctl->rmt_addr_l_or_token_id = sge_info[0].addr & (uint32_t)SQE_CTL_RMA_ADDR_BIT; + sqe_ctl->rmt_addr_h_or_token_value = + (sge_info[0].addr >> (uint32_t)SQE_CTL_RMA_ADDR_OFFSET) & + (uint32_t)SQE_CTL_RMA_ADDR_BIT; + + return 0; +} + +static bool +udma_k_check_atomic_len(struct udma_dev *dev, uint32_t len, uint8_t opcode) +{ + switch (len) { + case UDMA_ATOMIC_LEN_4: + case UDMA_ATOMIC_LEN_8: + return true; + case UDMA_ATOMIC_LEN_16: + if (opcode == UBCORE_OPC_CAS) + return true; + dev_err(dev->dev, "the atomic opcode must be CAS when len is 16.\n"); + return false; + default: + dev_err(dev->dev, "invalid atomic len %u.\n", len); + return false; + } +} + +static int +udma_k_fill_cas_sqe(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + struct ubcore_jfs_wr *wr) +{ + struct udma_segment *udma_seg; + struct udma_normal_sge *sge; + struct ubcore_sge *sge_info; + + sge_info = wr->cas.src; + if (!udma_k_check_atomic_len(dev, sge_info->len, wr->opcode)) + return -EINVAL; + + sge = (struct udma_normal_sge *)(sqe_ctl + 1); + + sge->va = sge_info->addr; + sge->length = sge_info->len; + + sge_info = wr->cas.dst; + udma_seg = to_udma_seg(sge_info->tseg); + + sqe_ctl->sge_num = UDMA_ATOMIC_SGE_NUM; + sqe_ctl->rmt_obj_id = udma_seg->tid; + sqe_ctl->token_en = udma_seg->token_value_valid; + sqe_ctl->rmt_token_value = udma_seg->token_value; + sqe_ctl->rmt_addr_l_or_token_id = sge_info->addr & (uint32_t)SQE_CTL_RMA_ADDR_BIT; + sqe_ctl->rmt_addr_h_or_token_value = (sge_info->addr >> (uint32_t)SQE_CTL_RMA_ADDR_OFFSET) & + (uint32_t)SQE_CTL_RMA_ADDR_BIT; + + if (sge->length <= UDMA_ATOMIC_LEN_8) { + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD, + &wr->cas.swap_data, sge->length); + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD + sge->length, + &wr->cas.cmp_data, sge->length); + } else { + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD, + (void *)(uintptr_t)wr->cas.swap_addr, sge->length); + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD + sge->length, + (void *)(uintptr_t)wr->cas.cmp_addr, sge->length); + } + + return 0; +} + +static int +udma_k_fill_faa_sqe(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + struct ubcore_jfs_wr *wr) +{ + struct udma_segment *udma_seg; + struct udma_normal_sge *sge; + struct ubcore_sge *sge_info; + + sge_info = wr->faa.src; + if (!udma_k_check_atomic_len(dev, sge_info->len, wr->opcode)) + return -EINVAL; + + sge = (struct udma_normal_sge *)(sqe_ctl + 1); + + sge->va = sge_info->addr; + sge->length = sge_info->len; + + sge_info = wr->faa.dst; + udma_seg = to_udma_seg(sge_info->tseg); + + sqe_ctl->sge_num = UDMA_ATOMIC_SGE_NUM; + sqe_ctl->rmt_obj_id = udma_seg->tid; + sqe_ctl->token_en = udma_seg->token_value_valid; + sqe_ctl->rmt_token_value = udma_seg->token_value; + sqe_ctl->rmt_addr_l_or_token_id = sge_info->addr & (uint32_t)SQE_CTL_RMA_ADDR_BIT; + sqe_ctl->rmt_addr_h_or_token_value = (sge_info->addr >> (uint32_t)SQE_CTL_RMA_ADDR_OFFSET) & + (uint32_t)SQE_CTL_RMA_ADDR_BIT; + + if (sge->length <= UDMA_ATOMIC_LEN_8) + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD, &wr->faa.operand, + sge->length); + else + memcpy((void *)sqe_ctl + SQE_ATOMIC_DATA_FIELD, + (void *)(uintptr_t)wr->faa.operand_addr, sge->length); + + return 0; +} + +static int udma_fill_normal_sge(struct udma_dev *dev, struct udma_sqe_ctl *sqe_ctl, + uint32_t max_inline_size, struct ubcore_jfs_wr *wr, + struct ubcore_tjetty *tjetty) +{ + switch (sqe_ctl->opcode) { + case UDMA_OPC_SEND: + case UDMA_OPC_SEND_WITH_IMM: + case UDMA_OPC_SEND_WITH_INVALID: + return udma_k_fill_send_sqe(dev, sqe_ctl, wr, tjetty, + max_inline_size); + case UDMA_OPC_WRITE: + return udma_k_fill_write_sqe(dev, sqe_ctl, wr, tjetty, max_inline_size); + case UDMA_OPC_WRITE_WITH_IMM: + return udma_k_fill_write_sqe(dev, sqe_ctl, wr, tjetty, + max_inline_size > SQE_WRITE_IMM_INLINE_SIZE ? + SQE_WRITE_IMM_INLINE_SIZE : max_inline_size); + case UDMA_OPC_READ: + return udma_k_fill_read_sqe(sqe_ctl, wr); + case UDMA_OPC_CAS: + return udma_k_fill_cas_sqe(dev, sqe_ctl, wr); + case UDMA_OPC_FAA: + return udma_k_fill_faa_sqe(dev, sqe_ctl, wr); + default: + return -EINVAL; + } +} + +static int udma_k_set_sqe(struct udma_sqe_ctl *sqe_ctl, struct ubcore_jfs_wr *wr, + struct udma_jetty_queue *sq, uint8_t opcode, + struct udma_dev *dev) +{ + struct udma_target_jetty *udma_tjetty; + struct ubcore_tjetty *tjetty; + int ret = 0; + + sqe_ctl->cqe = wr->flag.bs.complete_enable; + sqe_ctl->owner = (sq->pi & sq->buf.entry_cnt) == 0 ? 1 : 0; + sqe_ctl->opcode = opcode; + sqe_ctl->place_odr = wr->flag.bs.place_order; + + if (opcode == UDMA_OPC_NOP) + return 0; + + if (sq->trans_mode == UBCORE_TP_RC) + tjetty = sq->rc_tjetty; + else + tjetty = wr->tjetty; + + udma_tjetty = to_udma_tjetty(tjetty); + + sqe_ctl->tpn = tjetty->vtpn->vtpn; + sqe_ctl->fence = wr->flag.bs.fence; + sqe_ctl->comp_order = wr->flag.bs.comp_order; + sqe_ctl->se = wr->flag.bs.solicited_enable; + sqe_ctl->inline_en = wr->flag.bs.inline_flag; + sqe_ctl->rmt_jetty_type = tjetty->cfg.type; + memcpy(sqe_ctl->rmt_eid, &udma_tjetty->le_eid.raw, sizeof(uint8_t) * + UDMA_SQE_RMT_EID_SIZE); + + ret = udma_fill_normal_sge(dev, sqe_ctl, sq->max_inline_size, wr, tjetty); + if (ret) + dev_err(dev->dev, "Failed to fill normal sge, opcode :%u in wr.\n", + (uint8_t)wr->opcode); + + return ret; +} + +static bool udma_k_check_sge_num(uint8_t opcode, struct udma_jetty_queue *sq, + struct ubcore_jfs_wr *wr) +{ + switch (opcode) { + case UDMA_OPC_CAS: + case UDMA_OPC_FAA: + return sq->max_sge_num == 0; + case UDMA_OPC_READ: + return wr->rw.dst.num_sge > UDMA_JFS_MAX_SGE_READ || + wr->rw.dst.num_sge > sq->max_sge_num; + case UDMA_OPC_WRITE_WITH_IMM: + return wr->rw.src.num_sge > UDMA_JFS_MAX_SGE_WRITE_IMM || + wr->rw.src.num_sge > sq->max_sge_num; + case UDMA_OPC_SEND: + case UDMA_OPC_SEND_WITH_IMM: + case UDMA_OPC_SEND_WITH_INVALID: + return wr->send.src.num_sge > sq->max_sge_num; + default: + return wr->rw.src.num_sge > sq->max_sge_num; + } +} + +static void udma_copy_to_sq(struct udma_jetty_queue *sq, uint32_t wqebb_cnt, + struct udma_jfs_wqebb *tmp_sq) +{ + uint32_t remain = sq->buf.entry_cnt - (sq->pi & (sq->buf.entry_cnt - 1)); + uint32_t field_h; + uint32_t field_l; + + field_h = remain > wqebb_cnt ? wqebb_cnt : remain; + field_l = wqebb_cnt > field_h ? wqebb_cnt - field_h : 0; + + memcpy(sq->kva_curr, tmp_sq, field_h * sizeof(*tmp_sq)); + + if (field_l) + memcpy(sq->buf.kva, tmp_sq + field_h, field_l * sizeof(*tmp_sq)); +} + +static void *udma_k_inc_ptr_wrap(uint32_t sq_buf_size, uint32_t wqebb_size, + uint8_t *sq_base_addr, uint8_t *sq_buf_curr) +{ + uint8_t *sq_buf_end; + + sq_buf_end = (uint8_t *)(sq_buf_size + sq_base_addr); + + sq_buf_curr = ((sq_buf_curr + wqebb_size) < sq_buf_end) ? + (sq_buf_curr + wqebb_size) : sq_base_addr + (sq_buf_curr + + wqebb_size - sq_buf_end); + + return sq_buf_curr; +} + +static int udma_post_one_wr(struct udma_jetty_queue *sq, struct ubcore_jfs_wr *wr, + struct udma_dev *udma_dev, struct udma_sqe_ctl **wqe_addr, + bool *dwqe_enable) +{ + struct udma_jfs_wqebb tmp_sq[MAX_WQEBB_NUM] = {}; + uint32_t wqebb_cnt; + uint8_t opcode; + uint32_t i; + int ret; + + opcode = udma_get_jfs_opcode(wr->opcode); + if (unlikely(opcode == UDMA_OPC_INVALID)) { + dev_err(udma_dev->dev, "Invalid opcode :%u.\n", wr->opcode); + return -EINVAL; + } + + if (unlikely(udma_k_check_sge_num(opcode, sq, wr))) { + dev_err(udma_dev->dev, "WR sge num invalid.\n"); + return -EINVAL; + } + + ret = udma_k_set_sqe((struct udma_sqe_ctl *)(void *)tmp_sq, wr, sq, + opcode, udma_dev); + if (ret) + return ret; + + wqebb_cnt = get_wqebb_num((struct udma_sqe_ctl *)(void *)tmp_sq); + if (wqebb_cnt == 1 && !!(udma_dev->caps.feature & UDMA_CAP_FEATURE_DIRECT_WQE)) + *dwqe_enable = true; + + if (to_check_sq_overflow(sq, wqebb_cnt)) { + dev_err(udma_dev->dev, "JFS overflow, wqebb_cnt:%u.\n", wqebb_cnt); + return -ENOMEM; + } + + udma_copy_to_sq(sq, wqebb_cnt, tmp_sq); + + *wqe_addr = (struct udma_sqe_ctl *)sq->kva_curr; + + sq->kva_curr = udma_k_inc_ptr_wrap(sq->buf.entry_cnt * sq->buf.entry_size, + wqebb_cnt * sq->buf.entry_size, + (uint8_t *)sq->buf.kva, + (uint8_t *)sq->kva_curr); + + for (i = 0; i < wqebb_cnt; i++) + sq->wrid[(sq->pi + i) & (sq->buf.entry_cnt - 1)] = wr->user_ctx; + + sq->pi += wqebb_cnt; + + return 0; +} + +static inline void udma_k_update_sq_db(struct udma_jetty_queue *sq) +{ + uint32_t *db_addr = sq->db_addr; + *db_addr = sq->pi; +} + +#ifdef ST64B +static void st64b(uint64_t *src, uint64_t *dst) +{ + asm volatile ( + "mov x9, %0\n" + "mov x10, %1\n" + "ldr x0, [x9]\n" + "ldr x1, [x9, #8]\n" + "ldr x2, [x9, #16]\n" + "ldr x3, [x9, #24]\n" + "ldr x4, [x9, #32]\n" + "ldr x5, [x9, #40]\n" + "ldr x6, [x9, #48]\n" + "ldr x7, [x9, #56]\n" + ".inst 0xf83f9140\n" + ::"r" (src), "r"(dst):"cc", "memory" + ); +} +#endif + +static void udma_write_dsqe(struct udma_jetty_queue *sq, + struct udma_sqe_ctl *ctrl) +{ +#define DWQE_SIZE 8 + int i; + + ctrl->sqe_bb_idx = sq->pi; + +#ifdef ST64B + st64b(((uint64_t *)ctrl), (uint64_t *)sq->dwqe_addr); +#else + for (i = 0; i < DWQE_SIZE; i++) + writeq_relaxed(*((uint64_t *)ctrl + i), + (uint64_t *)sq->dwqe_addr + i); +#endif +} + +/* thanks to drivers/infiniband/hw/bnxt_re/ib_verbs.c */ +int udma_post_sq_wr(struct udma_dev *udma_dev, struct udma_jetty_queue *sq, + struct ubcore_jfs_wr *wr, struct ubcore_jfs_wr **bad_wr) +{ + struct udma_sqe_ctl *wqe_addr; + bool dwqe_enable = false; + struct ubcore_jfs_wr *it; + int wr_cnt = 0; + int ret = 0; + + if (!sq->lock_free) + spin_lock(&sq->lock); + + for (it = wr; it != NULL; it = (struct ubcore_jfs_wr *)(void *)it->next) { + ret = udma_post_one_wr(sq, it, udma_dev, &wqe_addr, &dwqe_enable); + if (ret) { + *bad_wr = it; + goto err_post_wr; + } + wr_cnt++; + } + +err_post_wr: + if (likely(wr_cnt && udma_dev->status != UDMA_SUSPEND)) { + wmb(); /* set sqe before doorbell */ + if (wr_cnt == 1 && dwqe_enable && (sq->pi - sq->ci == 1)) + udma_write_dsqe(sq, wqe_addr); + else + udma_k_update_sq_db(sq); + } + + if (!sq->lock_free) + spin_unlock(&sq->lock); + + return ret; +} + +int udma_post_jfs_wr(struct ubcore_jfs *jfs, struct ubcore_jfs_wr *wr, + struct ubcore_jfs_wr **bad_wr) +{ + struct udma_dev *udma_dev = to_udma_dev(jfs->ub_dev); + struct udma_jfs *udma_jfs = to_udma_jfs(jfs); + int ret; + + ret = udma_post_sq_wr(udma_dev, &udma_jfs->sq, wr, bad_wr); + if (ret) + dev_err(udma_dev->dev, "Failed to post jfs wr, sq_id = %u.\n", + udma_jfs->sq.id); + + return ret; +} diff --git a/drivers/ub/urma/hw/udma/udma_jfs.h b/drivers/ub/urma/hw/udma/udma_jfs.h index 6cdc281e53c3b6c26e33e1ed594f9c47f29a588e..d3a29f2a68a04cfb5cf7c0b9bf272e79345f4847 100644 --- a/drivers/ub/urma/hw/udma/udma_jfs.h +++ b/drivers/ub/urma/hw/udma/udma_jfs.h @@ -7,10 +7,30 @@ #include "udma_common.h" #define MAX_WQEBB_NUM 4 +#define UDMA_SQE_RMT_EID_SIZE 16 +#define SQE_WRITE_IMM_CTL_LEN 64 +#define SQE_NORMAL_CTL_LEN 48 +#define ATOMIC_WQEBB_CNT 2 +#define NOP_WQEBB_CNT 1 #define UDMA_JFS_WQEBB_SIZE 64 #define UDMA_JFS_SGE_SIZE 16 +#define UDMA_JFS_MAX_SGE_READ 6 +#define UDMA_JFS_MAX_SGE_WRITE_IMM 12 +#define UDMA_ATOMIC_SGE_NUM 1 +#define UDMA_ATOMIC_LEN_4 4 +#define UDMA_ATOMIC_LEN_8 8 +#define UDMA_ATOMIC_LEN_16 16 +#define SQE_CTL_RMA_ADDR_OFFSET 32 +#define SQE_CTL_RMA_ADDR_BIT GENMASK(31, 0) +#define SQE_ATOMIC_DATA_FIELD 64 +#define SQE_SEND_IMM_FIELD 40 +#define WRITE_IMM_TOKEN_FIELD 56 +#define SQE_WRITE_IMM_FIELD 48 #define SQE_WRITE_NOTIFY_CTL_LEN 80 +#define SQE_WRITE_IMM_INLINE_SIZE 192 + +#define UINT8_MAX 0xff enum udma_jfs_type { UDMA_NORMAL_JFS_TYPE, @@ -28,6 +48,63 @@ struct udma_jfs { bool ue_rx_closed; }; +/* thanks to include/rdma/ib_verbs.h */ +enum udma_sq_opcode { + UDMA_OPC_SEND, + UDMA_OPC_SEND_WITH_IMM, + UDMA_OPC_SEND_WITH_INVALID, + UDMA_OPC_WRITE, + UDMA_OPC_WRITE_WITH_IMM, + UDMA_OPC_READ = 0x6, + UDMA_OPC_CAS, + UDMA_OPC_FAA = 0xb, + UDMA_OPC_NOP = 0x11, + UDMA_OPC_INVALID = 0x12, +}; + +struct udma_jfs_wqebb { + uint32_t value[16]; +}; + +struct udma_sqe_ctl { + uint32_t sqe_bb_idx : 16; + uint32_t place_odr : 2; + uint32_t comp_order : 1; + uint32_t fence : 1; + uint32_t se : 1; + uint32_t cqe : 1; + uint32_t inline_en : 1; + uint32_t rsv : 5; + uint32_t token_en : 1; + uint32_t rmt_jetty_type : 2; + uint32_t owner : 1; + uint32_t target_hint : 8; + uint32_t opcode : 8; + uint32_t rsv1 : 6; + uint32_t inline_msg_len : 10; + uint32_t tpn : 24; + uint32_t sge_num : 8; + uint32_t rmt_obj_id : 20; + uint32_t rsv2 : 12; + uint8_t rmt_eid[UDMA_SQE_RMT_EID_SIZE]; + uint32_t rmt_token_value; + uint32_t rsv3; + uint32_t rmt_addr_l_or_token_id; + uint32_t rmt_addr_h_or_token_value; +}; + +struct udma_normal_sge { + uint32_t length; + uint32_t token_id; + uint64_t va; +}; + +struct udma_token_info { + uint32_t token_id : 20; + uint32_t rsv : 12; + uint32_t token_value; +}; + static inline struct udma_jfs *to_udma_jfs(struct ubcore_jfs *jfs) { return container_of(jfs, struct udma_jfs, ubcore_jfs); @@ -38,16 +115,28 @@ static inline struct udma_jfs *to_udma_jfs_from_queue(struct udma_jetty_queue *q return container_of(queue, struct udma_jfs, sq); } +static inline bool to_check_sq_overflow(struct udma_jetty_queue *sq, + uint32_t wqebb_cnt) +{ + return sq->pi - sq->ci + wqebb_cnt > sq->buf.entry_cnt; +} + static inline uint32_t sq_cal_wqebb_num(uint32_t sqe_ctl_len, uint32_t sge_num) { return (sqe_ctl_len + (sge_num - 1) * UDMA_JFS_SGE_SIZE) / UDMA_JFS_WQEBB_SIZE + 1; } +static inline uint32_t get_ctl_len(uint8_t opcode) +{ + return opcode == UDMA_OPC_WRITE_WITH_IMM ? SQE_WRITE_IMM_CTL_LEN : SQE_NORMAL_CTL_LEN; +} + struct ubcore_jfs *udma_create_jfs(struct ubcore_device *ub_dev, struct ubcore_jfs_cfg *cfg, struct ubcore_udata *udata); int udma_destroy_jfs(struct ubcore_jfs *jfs); +int udma_destroy_jfs_batch(struct ubcore_jfs **jfs_arr, int jfs_num, int *bad_jfs_index); int udma_alloc_u_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, struct udma_create_jetty_ucmd *ucmd); int udma_alloc_k_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, @@ -55,5 +144,17 @@ int udma_alloc_k_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq, void udma_free_sq_buf(struct udma_dev *dev, struct udma_jetty_queue *sq); int udma_modify_jfs(struct ubcore_jfs *jfs, struct ubcore_jfs_attr *attr, struct ubcore_udata *udata); +int udma_flush_jfs(struct ubcore_jfs *jfs, int cr_cnt, struct ubcore_cr *cr); +int udma_post_sq_wr(struct udma_dev *udma_dev, struct udma_jetty_queue *sq, + struct ubcore_jfs_wr *wr, struct ubcore_jfs_wr **bad_wr); +int udma_post_jfs_wr(struct ubcore_jfs *jfs, struct ubcore_jfs_wr *wr, + struct ubcore_jfs_wr **bad_wr); +void udma_flush_sq(struct udma_dev *udma_dev, + struct udma_jetty_queue *sq, struct ubcore_cr *cr); +void udma_dfx_store_jfs_id(struct udma_dev *udma_dev, struct udma_jfs *udma_jfs); +void udma_init_jfsc(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, + struct udma_jfs *jfs, void *mb_buf); +int udma_verify_jfs_param(struct udma_dev *dev, struct ubcore_jfs_cfg *cfg, + bool enable_stars); #endif /* __UDMA_JFS_H__ */ diff --git a/drivers/ub/urma/hw/udma/udma_main.c b/drivers/ub/urma/hw/udma/udma_main.c index b1fad9e31f38c5a9e64b77f5de509b8fa3988923..cbf773d01c48288f7f42b723b99c1164c1178c89 100644 --- a/drivers/ub/urma/hw/udma/udma_main.c +++ b/drivers/ub/urma/hw/udma/udma_main.c @@ -162,6 +162,35 @@ static int udma_query_device_attr(struct ubcore_device *dev, return 0; } +static int udma_query_stats(struct ubcore_device *dev, struct ubcore_stats_key *key, + struct ubcore_stats_val *val) +{ + struct ubcore_stats_com_val *com_val = (struct ubcore_stats_com_val *)val->addr; + struct udma_dev *udma_dev = to_udma_dev(dev); + struct ubase_ub_dl_stats dl_stats = {}; + int ret; + + ret = ubase_get_ub_port_stats(udma_dev->comdev.adev, + udma_dev->port_logic_id, &dl_stats); + if (ret) { + dev_err(udma_dev->dev, "failed to query port stats, ret = %d.\n", ret); + return ret; + } + + com_val->tx_pkt = dl_stats.dl_tx_busi_pkt_num; + com_val->rx_pkt = dl_stats.dl_rx_busi_pkt_num; + com_val->rx_pkt_err = 0; + com_val->tx_pkt_err = 0; + com_val->tx_bytes = 0; + com_val->rx_bytes = 0; + + return ret; +} + +static void udma_disassociate_ucontext(struct ubcore_ucontext *uctx) +{ +} + static struct ubcore_ops g_dev_ops = { .owner = THIS_MODULE, .abi_version = 0, @@ -181,21 +210,46 @@ static struct ubcore_ops g_dev_ops = { .create_jfc = udma_create_jfc, .modify_jfc = udma_modify_jfc, .destroy_jfc = udma_destroy_jfc, + .rearm_jfc = udma_rearm_jfc, .create_jfs = udma_create_jfs, .modify_jfs = udma_modify_jfs, .query_jfs = udma_query_jfs, + .flush_jfs = udma_flush_jfs, .destroy_jfs = udma_destroy_jfs, + .destroy_jfs_batch = udma_destroy_jfs_batch, .create_jfr = udma_create_jfr, .modify_jfr = udma_modify_jfr, .query_jfr = udma_query_jfr, .destroy_jfr = udma_destroy_jfr, .destroy_jfr_batch = udma_destroy_jfr_batch, + .import_jfr_ex = udma_import_jfr_ex, + .unimport_jfr = udma_unimport_jfr, .create_jetty = udma_create_jetty, .modify_jetty = udma_modify_jetty, .query_jetty = udma_query_jetty, + .flush_jetty = udma_flush_jetty, .destroy_jetty = udma_destroy_jetty, + .destroy_jetty_batch = udma_destroy_jetty_batch, + .import_jetty_ex = udma_import_jetty_ex, + .unimport_jetty = udma_unimport_jetty, + .bind_jetty_ex = udma_bind_jetty_ex, + .unbind_jetty = udma_unbind_jetty, .create_jetty_grp = udma_create_jetty_grp, .delete_jetty_grp = udma_delete_jetty_grp, + .get_tp_list = udma_get_tp_list, + .set_tp_attr = udma_set_tp_attr, + .get_tp_attr = udma_get_tp_attr, + .active_tp = udma_active_tp, + .deactive_tp = udma_deactive_tp, + .user_ctl = udma_user_ctl, + .post_jfs_wr = udma_post_jfs_wr, + .post_jfr_wr = udma_post_jfr_wr, + .post_jetty_send_wr = udma_post_jetty_send_wr, + .post_jetty_recv_wr = udma_post_jetty_recv_wr, + .poll_jfc = udma_poll_jfc, + .query_stats = udma_query_stats, + .query_ue_idx = udma_query_ue_idx, + .disassociate_ucontext = udma_disassociate_ucontext, }; static void udma_uninit_group_table(struct udma_dev *dev, struct udma_group_table *table) @@ -228,6 +282,7 @@ static void udma_destroy_tp_ue_idx_table(struct udma_dev *udma_dev) void udma_destroy_tables(struct udma_dev *udma_dev) { + udma_ctrlq_destroy_tpid_list(udma_dev, &udma_dev->ctrlq_tpid_table, false); udma_destroy_eid_table(udma_dev); mutex_destroy(&udma_dev->disable_ue_rx_mutex); if (!ida_is_empty(&udma_dev->rsvd_jetty_ida_table.ida)) @@ -240,6 +295,7 @@ void udma_destroy_tables(struct udma_dev *udma_dev) xa_destroy(&udma_dev->crq_nb_table); udma_destroy_tp_ue_idx_table(udma_dev); + udma_destroy_npu_cb_table(udma_dev); if (!xa_empty(&udma_dev->ksva_table)) dev_err(udma_dev->dev, "ksva table is not empty.\n"); @@ -289,6 +345,7 @@ static void udma_init_managed_by_ctrl_cpu_table(struct udma_dev *udma_dev) { mutex_init(&udma_dev->eid_mutex); xa_init(&udma_dev->eid_table); + xa_init(&udma_dev->ctrlq_tpid_table); } int udma_init_tables(struct udma_dev *udma_dev) @@ -430,7 +487,6 @@ static void udma_get_jetty_id_range(struct udma_dev *udma_dev, static int query_caps_from_firmware(struct udma_dev *udma_dev) { -#define RC_QUEUE_ENTRY_SIZE 128 struct udma_cmd_ue_resource cmd = {}; int ret; @@ -457,10 +513,6 @@ static int query_caps_from_firmware(struct udma_dev *udma_dev) udma_get_jetty_id_range(udma_dev, &cmd); - udma_dev->caps.rc_queue_num = cmd.rc_queue_num; - udma_dev->caps.rc_queue_depth = cmd.rc_depth; - udma_dev->caps.rc_entry_size = RC_QUEUE_ENTRY_SIZE; - udma_dev->caps.feature = cmd.cap_info; udma_dev->caps.ue_cnt = cmd.ue_cnt >= UDMA_DEV_UE_NUM ? UDMA_DEV_UE_NUM - 1 : cmd.ue_cnt; @@ -524,9 +576,24 @@ static int udma_construct_qos_param(struct udma_dev *dev) return 0; } +static void cal_max_2m_num(struct udma_dev *dev) +{ + uint32_t jfs_pg = ALIGN(dev->caps.jfs.depth * MAX_WQEBB_IN_SQE * + UDMA_JFS_WQEBB_SIZE, UDMA_HUGEPAGE_SIZE) >> UDMA_HUGEPAGE_SHIFT; + uint32_t jfr_pg = ALIGN(dev->caps.jfr.depth * dev->caps.jfr_sge * + UDMA_SGE_SIZE, UDMA_HUGEPAGE_SIZE) >> UDMA_HUGEPAGE_SHIFT; + uint32_t jfc_pg = ALIGN(dev->caps.jfc.depth * dev->caps.cqe_size, + UDMA_HUGEPAGE_SIZE) >> UDMA_HUGEPAGE_SHIFT; + + dev->total_hugepage_num = + (dev->caps.jetty.start_idx + dev->caps.jetty.max_cnt) * jfs_pg + + dev->caps.jfr.max_cnt * jfr_pg + dev->caps.jfc.max_cnt * jfc_pg; +} + static int udma_set_hw_caps(struct udma_dev *udma_dev) { #define MAX_MSG_LEN 0x10000 +#define RC_QUEUE_ENTRY_SIZE 64 struct ubase_adev_caps *a_caps; uint32_t jetty_grp_cnt; int ret; @@ -552,6 +619,14 @@ static int udma_set_hw_caps(struct udma_dev *udma_dev) udma_dev->caps.jetty.start_idx = a_caps->jfs.start_idx; udma_dev->caps.jetty.next_idx = udma_dev->caps.jetty.start_idx; udma_dev->caps.cqe_size = UDMA_CQE_SIZE; + udma_dev->caps.rc_queue_num = a_caps->rc_max_cnt; + udma_dev->caps.rc_queue_depth = a_caps->rc_que_depth; + udma_dev->caps.rc_entry_size = RC_QUEUE_ENTRY_SIZE; + udma_dev->caps.rc_dma_len = a_caps->pmem.dma_len; + udma_dev->caps.rc_dma_addr = a_caps->pmem.dma_addr; + + cal_max_2m_num(udma_dev); + ret = udma_construct_qos_param(udma_dev); if (ret) return ret; @@ -600,11 +675,14 @@ static int udma_init_dev_param(struct udma_dev *udma_dev) for (i = 0; i < UDMA_DB_TYPE_NUM; i++) INIT_LIST_HEAD(&udma_dev->db_list[i]); + udma_init_hugepage(udma_dev); + return 0; } static void udma_uninit_dev_param(struct udma_dev *udma_dev) { + udma_destroy_hugepage(udma_dev); mutex_destroy(&udma_dev->db_mutex); dev_set_drvdata(&udma_dev->comdev.adev->dev, NULL); udma_destroy_tables(udma_dev); diff --git a/drivers/ub/urma/hw/udma/udma_rct.c b/drivers/ub/urma/hw/udma/udma_rct.c index 599c80c118fd86fd293315705e34b2cb6944a996..ee11d3ef3ee9b828197cc4413ff7acfc2cb29031 100644 --- a/drivers/ub/urma/hw/udma/udma_rct.c +++ b/drivers/ub/urma/hw/udma/udma_rct.c @@ -51,13 +51,50 @@ static int udma_destroy_rc_queue_ctx(struct udma_dev *dev, struct udma_rc_queue return ret; } +static int udma_alloc_rct_buffer(struct udma_dev *dev, struct ubcore_device_cfg *cfg, + struct udma_rc_queue *rcq) +{ + uint32_t rct_buffer_size = dev->caps.rc_entry_size * cfg->rc_cfg.depth; + uint32_t buf_num_per_hugepage; + + rcq->buf.entry_size = dev->caps.rc_entry_size; + rcq->buf.entry_cnt = cfg->rc_cfg.depth; + if (ubase_adev_prealloc_supported(dev->comdev.adev)) { + rct_buffer_size = ALIGN(rct_buffer_size, PAGE_SIZE); + if (rct_buffer_size > UDMA_HUGEPAGE_SIZE) { + rcq->buf.addr = dev->caps.rc_dma_addr + rcq->id * rct_buffer_size; + } else { + buf_num_per_hugepage = UDMA_HUGEPAGE_SIZE / rct_buffer_size; + rcq->buf.addr = dev->caps.rc_dma_addr + + rcq->id / buf_num_per_hugepage * UDMA_HUGEPAGE_SIZE + + rcq->id % buf_num_per_hugepage * rct_buffer_size; + } + } else { + rcq->buf.kva_or_slot = udma_alloc_iova(dev, rct_buffer_size, &rcq->buf.addr); + if (!rcq->buf.kva_or_slot) { + dev_err(dev->dev, "failed to alloc rct buffer.\n"); + return -ENOMEM; + } + } + + return 0; +} + +static void udma_free_rct_buffer(struct udma_dev *dev, struct udma_rc_queue *rcq) +{ + uint32_t rct_buffer_size = rcq->buf.entry_size * rcq->buf.entry_cnt; + + if (!ubase_adev_prealloc_supported(dev->comdev.adev)) { + udma_free_iova(dev, rct_buffer_size, rcq->buf.kva_or_slot, rcq->buf.addr); + rcq->buf.kva_or_slot = NULL; + rcq->buf.addr = 0; + } +} + static int udma_alloc_rc_queue(struct udma_dev *dev, struct ubcore_device_cfg *cfg, int rc_queue_id) { - uint32_t rcq_entry_size = dev->caps.rc_entry_size; - uint32_t rcq_entry_num = cfg->rc_cfg.depth; struct udma_rc_queue *rcq; - uint32_t size; int ret; rcq = kzalloc(sizeof(struct udma_rc_queue), GFP_KERNEL); @@ -65,15 +102,9 @@ static int udma_alloc_rc_queue(struct udma_dev *dev, return -ENOMEM; rcq->id = rc_queue_id; - size = rcq_entry_size * rcq_entry_num; - rcq->buf.kva_or_slot = udma_alloc_iova(dev, size, &rcq->buf.addr); - if (!rcq->buf.kva_or_slot) { - ret = -ENOMEM; - dev_err(dev->dev, "failed to alloc rc queue buffer.\n"); - goto err_alloc_rcq; - } - rcq->buf.entry_size = rcq_entry_size; - rcq->buf.entry_cnt = rcq_entry_num; + ret = udma_alloc_rct_buffer(dev, cfg, rcq); + if (ret) + goto err_alloc_rct_buffer; ret = udma_create_rc_queue_ctx(dev, rcq); if (ret) { @@ -101,10 +132,8 @@ static int udma_alloc_rc_queue(struct udma_dev *dev, dev_err(dev->dev, "udma destroy rc queue ctx failed when alloc rc queue.\n"); err_create_rcq_ctx: - udma_free_iova(dev, size, rcq->buf.kva_or_slot, rcq->buf.addr); - rcq->buf.kva_or_slot = NULL; - rcq->buf.addr = 0; -err_alloc_rcq: + udma_free_rct_buffer(dev, rcq); +err_alloc_rct_buffer: kfree(rcq); return ret; @@ -131,10 +160,7 @@ void udma_free_rc_queue(struct udma_dev *dev, int rc_queue_id) if (dfx_switch) udma_dfx_delete_id(dev, &dev->dfx_info->rc, rc_queue_id); - udma_free_iova(dev, rcq->buf.entry_size * rcq->buf.entry_cnt, - rcq->buf.kva_or_slot, rcq->buf.addr); - rcq->buf.kva_or_slot = NULL; - rcq->buf.addr = 0; + udma_free_rct_buffer(dev, rcq); kfree(rcq); } diff --git a/include/uapi/ub/urma/udma/udma_abi.h b/include/uapi/ub/urma/udma/udma_abi.h index 02440d162c8d7e7396c15086a0915a06f6efac79..5859f5254b5e4543cc34cbe5d313abb8a6bb7601 100644 --- a/include/uapi/ub/urma/udma/udma_abi.h +++ b/include/uapi/ub/urma/udma/udma_abi.h @@ -74,7 +74,8 @@ struct udma_create_jetty_ucmd { __aligned_u64 jetty_addr; __u32 pi_type : 1; __u32 non_pin : 1; - __u32 rsv : 30; + __u32 is_hugepage : 1; + __u32 rsv : 29; __u32 jetty_type; __aligned_u64 jfr_sleep_buf; __u32 jfs_id; @@ -86,6 +87,9 @@ struct udma_create_jfc_ucmd { __u32 buf_len; __u32 mode; /* 0: normal, 1: user stars, 2: kernel stars */ __aligned_u64 db_addr; + __u32 is_hugepage : 1; + __u32 rsv : 31; + __u32 rsv1; }; struct udma_create_ctx_resp { @@ -93,7 +97,8 @@ struct udma_create_ctx_resp { __u32 dwqe_enable : 1; __u32 reduce_enable : 1; __u32 dump_aux_info : 1; - __u32 rsv : 21; + __u32 hugepage_enable : 1; + __u32 rsv : 20; __u32 ue_id; __u32 chip_id; __u32 die_id; @@ -109,6 +114,7 @@ struct udma_create_jfr_resp { enum db_mmap_type { UDMA_MMAP_JFC_PAGE, UDMA_MMAP_JETTY_DSQE, + UDMA_MMAP_HUGEPAGE, }; enum {