Thanks, PSB. > -----Original Message----- > From: Shahaf Shuler > Sent: Monday, October 29, 2018 12:03 PM > To: Dekel Peled <dek...@mellanox.com>; Yongseok Koh > <ys...@mellanox.com> > Cc: dev@dpdk.org; Ori Kam <or...@mellanox.com> > Subject: RE: [dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to > Direct Verbs > > Hi Dekel, > > Thursday, October 25, 2018 11:08 PM, Dekel Peled: > > Subject: [dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to > > Direct Verbs > > > > This patch implements the VXLAN encap action in DV flow for MLX5 PMD. > > > > Signed-off-by: Dekel Peled <dek...@mellanox.com> > > --- > > drivers/net/mlx5/mlx5_flow.h | 2 + > > drivers/net/mlx5/mlx5_flow_dv.c | 351 > > +++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 348 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/net/mlx5/mlx5_flow.h > > b/drivers/net/mlx5/mlx5_flow.h index 61299d6..6e92afe 100644 > > --- a/drivers/net/mlx5/mlx5_flow.h > > +++ b/drivers/net/mlx5/mlx5_flow.h > > @@ -92,6 +92,7 @@ > > #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) #define > > MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) #define > > MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21) > > +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22) > > > > #define MLX5_FLOW_FATE_ACTIONS \ > > (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | > > MLX5_FLOW_ACTION_RSS) @@ -181,6 +182,7 @@ struct mlx5_flow_dv { > #ifdef > > HAVE_IBV_FLOW_DV_SUPPORT > > struct mlx5dv_flow_action_attr > > actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS]; > > /**< Action list. */ > > + struct ibv_flow_action *verbs_action; /**< Verbs encap/decap > > The ibv_flow_action is already part of a union inside > mlx5dv_flow_action_attr, why you need it separately? > I see also in the below code that you copy it from the action list to this > specific field. Can you elaborate why? >
I added it to use when flow rule is removed, for easy access to the action to destroy. I am now changing the code per 17.11 PR 876, adding cache of encap/decap actions, so this member is no longer needed, and will be removed. > > object. > > +*/ > > #endif > > int actions_n; /**< number of actions. */ }; diff --git > > a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c > > index 8f729f4..14110c5 100644 > > --- a/drivers/net/mlx5/mlx5_flow_dv.c > > +++ b/drivers/net/mlx5/mlx5_flow_dv.c > > @@ -34,6 +34,12 @@ > > > > #ifdef HAVE_IBV_FLOW_DV_SUPPORT > > > > +/* > > + * Encap buf length, max: > > + * Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14 > > VLAN is 4B not 8B. which tunnel is for 20B? This is code I reused from 17.11, I will verify it. > > > + */ > > +#define MLX5_ENCAP_MAX_LEN 132 > > + > > /** > > * Validate META item. > > * > > @@ -96,6 +102,300 @@ > > } > > > > /** > > + * Validate the L2 encap action. > > + * Used for VXLAN encap action. > > No need for that. Later on you put more supported protocols. This is a > generic function for L2 encap validation. I will remove it. > > > + * > > + * @param[in] action_flags > > + * Holds the actions detected until now. > > + * @param[in] action > > + * Pointer to the encap action. > > + * @param[in] attr > > + * Pointer to flow attributes > > + * @param[out] error > > + * Pointer to error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +static int > > +flow_dv_validate_action_l2_encap(uint64_t action_flags, > > + const struct rte_flow_action *action, > > + const struct rte_flow_attr *attr, > > + struct rte_flow_error *error) > > +{ > > + if (!(action->conf)) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > action, > > + "configuration cannot be null"); > > + if (action_flags & MLX5_FLOW_ACTION_DROP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can't drop and encap in same > > flow"); > > + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > NULL, > > + "can only have a single encap" > > + " action in a flow"); > > + if (attr->ingress) > > + return rte_flow_error_set(error, ENOTSUP, > > + > > RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, > > + NULL, > > + "encap action not supported for " > > + "ingress"); > > + return 0; > > +} > > + > > +/** > > + * Get the size of specific rte_flow_item_type > > + * > > + * @param[in] item_type > > + * Tested rte_flow_item_type. > > + * > > + * @return > > + * sizeof struct item_type, 0 if void or irrelevant. > > + */ > > +static size_t > > +flow_dv_get_item_len(const enum rte_flow_item_type item_type) { > > Can we have this function as a macro? > > #define flow_dv_get_item_len(t) (strncpm(t, VOID, 4) ? 0 : sizeof(struct > rte_flow_item_##t) > > Usage: flow_dv_get_item_len(ETH) > I will change it. > > > + size_t retval; > > + > > + switch (item_type) { > > + case RTE_FLOW_ITEM_TYPE_ETH: > > + retval = sizeof(struct rte_flow_item_eth); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VLAN: > > + retval = sizeof(struct rte_flow_item_vlan); > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV4: > > + retval = sizeof(struct rte_flow_item_ipv4); > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV6: > > + retval = sizeof(struct rte_flow_item_ipv6); > > + break; > > + case RTE_FLOW_ITEM_TYPE_UDP: > > + retval = sizeof(struct rte_flow_item_udp); > > + break; > > + case RTE_FLOW_ITEM_TYPE_TCP: > > + retval = sizeof(struct rte_flow_item_tcp); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN: > > + retval = sizeof(struct rte_flow_item_vxlan); > > + break; > > + case RTE_FLOW_ITEM_TYPE_GRE: > > + retval = sizeof(struct rte_flow_item_gre); > > + break; > > + case RTE_FLOW_ITEM_TYPE_NVGRE: > > + retval = sizeof(struct rte_flow_item_nvgre); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: > > + retval = sizeof(struct rte_flow_item_vxlan_gpe); > > + break; > > + case RTE_FLOW_ITEM_TYPE_MPLS: > > + retval = sizeof(struct rte_flow_item_mpls); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VOID: /* Fall through. */ > > + default: > > + retval = 0; > > + break; > > + } > > + return retval; > > +}; > > + > > +/** > > + * Convert the encap action data from rte_flow_item to raw buffer > > + * > > + * @param[in] item > > + * Pointer to rte_flow_item object. > > Since it is an item list, "items" is preferable. I will rename it. > > > + * @param[out] buf > > + * Pointer to the output buffer. > > + * @param[out] size > > + * Pointer to the output buffer size. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > + */ > > +static int > > +flow_dv_convert_encap_data(const struct rte_flow_item *item, uint8_t > > *buf, > > + size_t *size, struct rte_flow_error *error) { > > + struct ether_hdr *eth = NULL; > > + struct vlan_hdr *vlan = NULL; > > + struct ipv4_hdr *ipv4 = NULL; > > + struct ipv6_hdr *ipv6 = NULL; > > + struct udp_hdr *udp = NULL; > > + struct vxlan_hdr *vxlan = NULL; > > + size_t len; > > + size_t temp_size = 0; > > + > > + if (!item) > > + return rte_flow_error_set(error, EINVAL, > > + RTE_FLOW_ERROR_TYPE_ACTION, > > + NULL, "invalid empty data"); > > + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { > > + len = flow_dv_get_item_len(item->type); > > + if (len + temp_size > MLX5_ENCAP_MAX_LEN) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "invalid item length"); > > It is not invalid item length, it is items total size is too big for encap. I will change it. > > > + rte_memcpy((void *)&buf[temp_size], item->spec, len); > > + switch (item->type) { > > + case RTE_FLOW_ITEM_TYPE_ETH: > > + eth = (struct ether_hdr *)&buf[temp_size]; > > + break; > > + case RTE_FLOW_ITEM_TYPE_VLAN: > > + vlan = (struct vlan_hdr *)&buf[temp_size]; > > + if (!eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "eth header not found"); > > + if (!eth->ether_type) > > + eth->ether_type = > > RTE_BE16(ETHER_TYPE_VLAN); > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV4: > > + ipv4 = (struct ipv4_hdr *)&buf[temp_size]; > > + if (!vlan && !eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "neither eth nor vlan" > > + " header found"); > > + if (vlan && !vlan->eth_proto) > > + vlan->eth_proto = > > RTE_BE16(ETHER_TYPE_IPv4); > > + else if (eth && !eth->ether_type) > > + eth->ether_type = > > RTE_BE16(ETHER_TYPE_IPv4); > > + if (!ipv4->version_ihl) > > + ipv4->version_ihl = 0x45; > > + if (!ipv4->time_to_live) > > + ipv4->time_to_live = 0x40; > > If no existing macro have those two above defined as ones. > There are few more places on this function related to this comment. I will change it. > > > + break; > > + case RTE_FLOW_ITEM_TYPE_IPV6: > > + ipv6 = (struct ipv6_hdr *)&buf[temp_size]; > > + if (!vlan && !eth) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "neither eth nor vlan" > > + " header found"); > > + if (vlan && !vlan->eth_proto) > > + vlan->eth_proto = > > RTE_BE16(ETHER_TYPE_IPv6); > > + else if (eth && !eth->ether_type) > > + eth->ether_type = > > RTE_BE16(ETHER_TYPE_IPv6); > > + if (!ipv6->vtc_flow) > > + ipv6->vtc_flow = RTE_BE32(0x60000000); > > + if (!ipv6->hop_limits) > > + ipv6->hop_limits = 0xff; > > + break; > > + case RTE_FLOW_ITEM_TYPE_UDP: > > + udp = (struct udp_hdr *)&buf[temp_size]; > > + if (!ipv4 && !ipv6) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "ip header not found"); > > + if (ipv4 && !ipv4->next_proto_id) > > + ipv4->next_proto_id = IPPROTO_UDP; > > + else if (ipv6 && !ipv6->proto) > > + ipv6->proto = IPPROTO_UDP; > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN: > > + vxlan = (struct vxlan_hdr *)&buf[temp_size]; > > + if (!udp) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "udp header not found"); > > + if (!udp->dst_port) > > + udp->dst_port = > > RTE_BE16(MLX5_UDP_PORT_VXLAN); > > + if (!vxlan->vx_flags) > > + vxlan->vx_flags = RTE_BE32(0x08000000); > > + break; > > + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: > > + vxlan = (struct vxlan_hdr *)&buf[temp_size]; > > + if (!udp) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "udp header not found"); > > + if (!udp->dst_port) > > + udp->dst_port = > > + > > RTE_BE16(MLX5_UDP_PORT_VXLAN_GPE); > > + if (!vxlan->vx_flags) > > + vxlan->vx_flags = RTE_BE32(0x0c000003); > > I would say you cannot set internally the next protocol. Only the user know > what it is. > I think in case VXLAN_GPE is set on the item list, the next_proto field is a > must, otherwise the rule should be rejected. I will verify it. > > > + break; > > + case RTE_FLOW_ITEM_TYPE_GRE: > > + case RTE_FLOW_ITEM_TYPE_NVGRE: > > + if (!ipv4 && !ipv6) > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "ip header not found"); > > + if (ipv4 && !ipv4->next_proto_id) > > + ipv4->next_proto_id = IPPROTO_GRE; > > + else if (ipv6 && !ipv6->proto) > > + ipv6->proto = IPPROTO_GRE; > > This patch is for VXLAN, yet you add a GRE/NVGRE code block. It is better to > add it on the subsequent patches adding this feature. > I will move it to next patch. > Also you need to check if the user put the protocol type on the GRE header. > Same as the VXLAN-GPE case, this is a must. > I will add it. > > + break; > > + case RTE_FLOW_ITEM_TYPE_VOID: > > + break; > > + default: > > + return rte_flow_error_set(error, EINVAL, > > + > > RTE_FLOW_ERROR_TYPE_ACTION, > > + (void *)item->type, > > + "unsupported item type"); > > + break; > > + } > > + temp_size += len; > > + } > > + *size = temp_size; > > + return 0; > > +} > > + > > +/** > > + * Convert L2 encap action to DV specification. > > + * Used for VXLAN encap action. > > Same - no need to specific which exact protocols. I will remove it. > > > + * > > + * @param[in] dev > > + * Pointer to rte_eth_dev structure. > > + * @param[in] action > > + * Pointer to action structure. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * Pointer to action on success, NULL otherwise and rte_errno is set. > > + */ > > +static struct ibv_flow_action * > > +flow_dv_create_action_l2_encap(struct rte_eth_dev *dev, > > + const struct rte_flow_action *action, > > + struct rte_flow_error *error) { > > + struct ibv_flow_action *verbs_action = NULL; > > + const struct rte_flow_item *encap_data; > > + struct priv *priv = dev->data->dev_private; > > + uint8_t buf[MLX5_ENCAP_MAX_LEN]; > > + size_t size = 0; > > + int convert_result = 0; > > + > > + encap_data = ((const struct rte_flow_action_vxlan_encap *) > > + action->conf)->definition; > > + convert_result = flow_dv_convert_encap_data(encap_data, buf, > > + &size, error); > > + if (convert_result) > > + return NULL; > > + verbs_action = mlx5_glue- > > >dv_create_flow_action_packet_reformat > > + (priv->ctx, size, (size ? buf : NULL), > > How can size be 0 and yet the encap action is valid? It can't, this is just for safety. I will remove it. > > > + > > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL, > > + MLX5DV_FLOW_TABLE_TYPE_NIC_TX); > > + if (!verbs_action) > > + rte_flow_error_set(error, EINVAL, > > RTE_FLOW_ERROR_TYPE_ACTION, > > + NULL, "cannot create L2 encap action"); > > + return verbs_action; > > +} > > + > > +/** > > * Verify the @p attributes will be correctly understood by the NIC and > store > > * them in the @p flow if everything is correct. > > * > > @@ -339,6 +639,16 @@ > > action_flags |= MLX5_FLOW_ACTION_COUNT; > > ++actions_n; > > break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: > > + ret = > > flow_dv_validate_action_l2_encap(action_flags, > > + actions, attr, > > + error); > > + if (ret < 0) > > + return ret; > > + action_flags |= > > MLX5_FLOW_ACTION_VXLAN_ENCAP; > > + ++actions_n; > > + break; > > + > > default: > > return rte_flow_error_set(error, ENOTSUP, > > > > RTE_FLOW_ERROR_TYPE_ACTION, > > @@ -1045,14 +1355,26 @@ > > /** > > * Store the requested actions in an array. > > * > > + * @param[in] dev > > + * Pointer to rte_eth_dev structure. > > * @param[in] action > > * Flow action to translate. > > * @param[in, out] dev_flow > > * Pointer to the mlx5_flow. > > + * @param[in] attr > > + * Pointer to the flow attributes. > > + * @param[out] error > > + * Pointer to the error structure. > > + * > > + * @return > > + * 0 on success, a negative errno value otherwise and rte_errno is set. > > */ > > -static void > > -flow_dv_create_action(const struct rte_flow_action *action, > > - struct mlx5_flow *dev_flow) > > +static int > > +flow_dv_create_action(struct rte_eth_dev *dev, > > + const struct rte_flow_action *action, > > + struct mlx5_flow *dev_flow, > > + const struct rte_flow_attr *attr __rte_unused, > > It was better to add this when you add the RAW encap feature. Now they > marked as unused, and you didn't changed that when you actually started to > use them. > Need to choose one of the paths: > 1. add them on the RAW encap/decap support (better) 2. remove the > rte_unused on the RAW encap/decap support > I will move it to later patch. > > > + struct rte_flow_error *error) > > { > > const struct rte_flow_action_queue *queue; > > const struct rte_flow_action_rss *rss; @@ -1100,10 +1422,24 @@ > > /* Added to array only in apply since we need the QP */ > > flow->actions |= MLX5_FLOW_ACTION_RSS; > > break; > > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: > > + dev_flow->dv.actions[actions_n].type = > > + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION; > > + dev_flow->dv.actions[actions_n].action = > > + flow_dv_create_action_l2_encap(dev, > > action, > > + error); > > + if (!(dev_flow->dv.actions[actions_n].action)) > > + return -rte_errno; > > + dev_flow->dv.verbs_action = > > + dev_flow->dv.actions[actions_n].action; > > + flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP; > > + actions_n++; > > + break; > > default: > > break; > > } > > dev_flow->dv.actions_n = actions_n; > > + return 0; > > } > > > > static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 }; > > @@ -1217,7 +1553,6 @@ > > return 0; > > } > > > > - > > /** > > * Fill the flow with DV spec. > > * > > @@ -1272,7 +1607,8 @@ > > if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) > > return -rte_errno; > > for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) > > - flow_dv_create_action(actions, dev_flow); > > + if (flow_dv_create_action(dev, actions, dev_flow, attr, > > error)) > > + return -rte_errno; > > return 0; > > } > > > > @@ -1457,6 +1793,11 @@ > > LIST_REMOVE(dev_flow, next); > > if (dev_flow->dv.matcher) > > flow_dv_matcher_release(dev, dev_flow); > > + if (dev_flow->dv.verbs_action) { > > Like I said in the beginning, I don't understand why this field is separate > from > the mlx5dv action list. > > > + claim_zero(mlx5_glue->destroy_flow_action > > + (dev_flow- > > >dv.verbs_action)); > > + dev_flow->dv.verbs_action = NULL; > > + } > > rte_free(dev_flow); > > } > > } > > -- > > 1.8.3.1