On 5 Jun 2023, at 14:41, Simon Horman wrote:

> On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
>> Currently, the per cpu upcall counters are allocated after the vport is
>> created and inserted into the system. This could lead to the datapath
>> accessing the counters before they are allocated resulting in a kernel
>> Oops.
>>
>> Here is an example:
>>
>>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
>>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
>>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
>>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
>>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
>>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
>>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
>>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
>>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
>>    ...
>>
>>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
>>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
>>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
>>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
>>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
>>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
>>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
>>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
>>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
>>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
>>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
>>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
>>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
>>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
>>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc 
>> [openvswitch]
>>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
>>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 
>> [openvswitch]
>>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
>>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at 
>> ffffb70f06079f90
>>
>> We moved the per cpu upcall counter allocation to the existing vport
>> alloc and free functions to solve this.
>>
>> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
>> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
>> Signed-off-by: Eelco Chaudron <[email protected]>
>> ---
>>  net/openvswitch/datapath.c |   19 -------------------
>>  net/openvswitch/vport.c    |    8 ++++++++
>>  2 files changed, 8 insertions(+), 19 deletions(-)
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index fcee6012293b..58f530f60172 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
>>      /* First drop references to device. */
>>      hlist_del_rcu(&p->dp_hash_node);
>>
>> -    /* Free percpu memory */
>> -    free_percpu(p->upcall_stats);
>> -
>>      /* Then destroy it. */
>>      ovs_vport_del(p);
>>  }
>> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
>> genl_info *info)
>>              goto err_destroy_portids;
>>      }
>>
>> -    vport->upcall_stats = netdev_alloc_pcpu_stats(struct 
>> vport_upcall_stats_percpu);
>> -    if (!vport->upcall_stats) {
>> -            err = -ENOMEM;
>> -            goto err_destroy_vport;
>> -    }
>> -
>>      err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
>>                                 info->snd_seq, 0, OVS_DP_CMD_NEW);
>>      BUG_ON(err < 0);
>> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
>> genl_info *info)
>>      ovs_notify(&dp_datapath_genl_family, reply, info);
>>      return 0;
>>
>> -err_destroy_vport:
>> -    ovs_dp_detach_port(vport);
>>  err_destroy_portids:
>>      kfree(rcu_dereference_raw(dp->upcall_portids));
>>  err_unlock_and_destroy_meters:
>> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, 
>> struct genl_info *info)
>>              goto exit_unlock_free;
>>      }
>>
>> -    vport->upcall_stats = netdev_alloc_pcpu_stats(struct 
>> vport_upcall_stats_percpu);
>> -    if (!vport->upcall_stats) {
>> -            err = -ENOMEM;
>> -            goto exit_unlock_free_vport;
>> -    }
>> -
>>      err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
>>                                    info->snd_portid, info->snd_seq, 0,
>>                                    OVS_VPORT_CMD_NEW, GFP_KERNEL);
>> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, 
>> struct genl_info *info)
>>      ovs_notify(&dp_vport_genl_family, reply, info);
>>      return 0;
>>
>> -exit_unlock_free_vport:
>> -    ovs_dp_detach_port(vport);
>>  exit_unlock_free:
>>      ovs_unlock();
>>      kfree_skb(reply);
>> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
>> index 7e0f5c45b512..e91ae5dd7d22 100644
>> --- a/net/openvswitch/vport.c
>> +++ b/net/openvswitch/vport.c
>
> Hi Eelco,
>
> could we move to a more idiomatic implementation
> of the error path in ovs_vport_alloc() ?
>
> I know it's not strictly related to this change, but OTOH, it is.

Thanks Simon for the review…

I decided to stick to fixing the issue, not trying to do cleanup stuff while at 
it :) But if there are no further comments by tomorrow, I can send a v2 
including this change.

>
>> @@ -135,12 +135,19 @@ struct vport *ovs_vport_alloc(int priv_size, const 
>> struct vport_ops *ops,
>>      if (!vport)
>>              return ERR_PTR(-ENOMEM);
>>
>> +    vport->upcall_stats = netdev_alloc_pcpu_stats(struct 
>> vport_upcall_stats_percpu);
>> +    if (!vport->upcall_stats) {
>               err = -ENOMEM;
>               goto err_kfree_vport;
>
>> +            kfree(vport);
>> +            return ERR_PTR(-ENOMEM);
>> +    }
>> +
>>      vport->dp = parms->dp;
>>      vport->port_no = parms->port_no;
>>      vport->ops = ops;
>>      INIT_HLIST_NODE(&vport->dp_hash_node);
>>
>>      if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
>               err = -EINVAL;
>               goto err_free_percpu;
>
>> +            free_percpu(vport->upcall_stats);
>>              kfree(vport);
>>              return ERR_PTR(-EINVAL);
>>      }
>
> ...
>       return vport;
>
> err_kfree_vport:
>       kfree(vport);
> err_free_percpu:
>       free_percpu(vport->upcall_stats);
>       return(ERR_PTR(err));
>
>
>> @@ -165,6 +172,7 @@ void ovs_vport_free(struct vport *vport)
>>       * it is safe to use raw dereference.
>>       */
>>      kfree(rcu_dereference_raw(vport->upcall_portids));
>> +    free_percpu(vport->upcall_stats);
>>      kfree(vport);
>>  }
>>  EXPORT_SYMBOL_GPL(ovs_vport_free);
>>
>> _______________________________________________
>> dev mailing list
>> [email protected]
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to