On Wed, Apr 14, 2021 at 05:23:50PM -0700, Martin KaFai Lau wrote:
> On Wed, Apr 14, 2021 at 08:26:08PM +0800, Hangbin Liu wrote:
> [ ... ]
> 
> > +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 
> > ifindex,
> > +                                             u64 flags, u64 flag_mask,
> >                                               void *lookup_elem(struct 
> > bpf_map *map, u32 key))
> >  {
> >     struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
> >  
> >     /* Lower bits of the flags are used as return code on lookup failure */
> > -   if (unlikely(flags > XDP_TX))
> > +   if (unlikely(flags & ~(BPF_F_ACTION_MASK | flag_mask)))
> >             return XDP_ABORTED;
> >  
> >     ri->tgt_value = lookup_elem(map, ifindex);
> > -   if (unlikely(!ri->tgt_value)) {
> > +   if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
> >             /* If the lookup fails we want to clear out the state in the
> >              * redirect_info struct completely, so that if an eBPF program
> >              * performs multiple lookups, the last one always takes
> > @@ -1482,13 +1484,21 @@ static __always_inline int 
> > __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
> >              */
> >             ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
> >             ri->map_type = BPF_MAP_TYPE_UNSPEC;
> > -           return flags;
> > +           return flags & BPF_F_ACTION_MASK;
> >     }
> >  
> >     ri->tgt_index = ifindex;
> >     ri->map_id = map->id;
> >     ri->map_type = map->map_type;
> >  
> > +   if (flags & BPF_F_BROADCAST) {
> > +           WRITE_ONCE(ri->map, map);
> Why only WRITE_ONCE on ri->map?  Is it needed?

I think this is make sure the map pointer assigned to ri->map safely.
which starts from commit f6069b9aa993 ("bpf: fix redirect to map under tail
calls")

> 
> > +           ri->flags = flags;
> > +   } else {
> > +           WRITE_ONCE(ri->map, NULL);
> > +           ri->flags = 0;
> > +   }
> > +
> >     return XDP_REDIRECT;
> >  }
> >  
> [ ... ]
> 
> > +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
> > +                     struct bpf_map *map, bool exclude_ingress)
> > +{
> > +   struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
> > +   int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
> > +   struct bpf_dtab_netdev *dst, *last_dst = NULL;
> > +   struct hlist_head *head;
> > +   struct hlist_node *next;
> > +   struct xdp_frame *xdpf;
> > +   unsigned int i;
> > +   int err;
> > +
> > +   xdpf = xdp_convert_buff_to_frame(xdp);
> > +   if (unlikely(!xdpf))
> > +           return -EOVERFLOW;
> > +
> > +   if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> > +           for (i = 0; i < map->max_entries; i++) {
> > +                   dst = READ_ONCE(dtab->netdev_map[i]);
> > +                   if (!is_valid_dst(dst, xdp, exclude_ifindex))
> > +                           continue;
> > +
> > +                   /* we only need n-1 clones; last_dst enqueued below */
> > +                   if (!last_dst) {
> > +                           last_dst = dst;
> > +                           continue;
> > +                   }
> > +
> > +                   err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
> > +                   if (err)
> > +                           return err;
> > +
> > +                   last_dst = dst;
> > +           }
> > +   } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
> > +           for (i = 0; i < dtab->n_buckets; i++) {
> > +                   head = dev_map_index_hash(dtab, i);
> > +                   hlist_for_each_entry_safe(dst, next, head, index_hlist) 
> > {
> hmm.... should it be hlist_for_each_entry_rcu() instead?

Ah, makes sense to me. I will fix it.

Thanks
Hangbin

Reply via email to