Re: [ovs-dev] [PATCH v8 5/6] dpif-netdev: Time based output batching.

Kevin Traynor Wed, 20 Dec 2017 06:23:34 -0800

On 12/20/2017 02:06 PM, Stokes, Ian wrote:
>>>
>>> Good catch,
>>>
>>> I was going to push this today but I'll hold off until this issue is
>> resolved, I don’t want an existing feature such as the rxq balancing being
>> negatively impacted upon if we can avoid it.
>>>
>>> Ian
>>
>> Ian, in general, I'm good with pushing the series without this patch
>> because simple output batching provides significant performance
>> improvement itself for bonding/multiflow cases. Maybe we can delay only
>> time-based approach until we have solution for rxq-cycles issue.
>>
> 
> I think that’s reasonable. I think with the RXQ balancing there will be a 
> number of situations/corner cases we need to consider and test for so the 
> extra time to give feedback on it would be useful. I'll also hold off on the 
> DPDK docs patch you have for the TX flushing and both can be merged together 
> at a later stage.
> 
> Kevin, would this solution be acceptable to you?
>


Yes, sounds good to me, tx batching is a really nice feature. It's just
the time based flushing that could cause an issue here and we can look
at that separately.

> If so, I think I'm correct in saying it's just this patch that should not be 
> merged, patches 1-4 of the set can still be applied as well as patch 6?
> 
> Ian
> 
>> Best regards, Ilya Maximets.
>>
>>>>
>>>>>          }
>>>>>
>>>>>          if (lc++ > 1024) {
>>>>> @@ -4150,9 +4228,6 @@ reload:
>>>>>              lc = 0;
>>>>>
>>>>>              coverage_try_clear();
>>>>> -            /* It's possible that the time was not updated on current
>>>>> -             * iteration, if there were no received packets. */
>>>>> -            pmd_thread_ctx_time_update(pmd);
>>>>>              dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
>>>>>              if (!ovsrcu_try_quiesce()) {
>>>>>                  emc_cache_slow_sweep(&pmd->flow_cache);
>>>>> @@ -4238,7 +4313,7 @@ dp_netdev_run_meter(struct dp_netdev *dp,
>>>>> struct
>>>> dp_packet_batch *packets_,
>>>>>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>>>>>
>>>>>      /* All packets will hit the meter at the same time. */
>>>>> -    long_delta_t = (now - meter->used); /* msec */
>>>>> +    long_delta_t = (now - meter->used) / 1000; /* msec */
>>>>>
>>>>>      /* Make sure delta_t will not be too large, so that bucket will
>> not
>>>>>       * wrap around below. */
>>>>> @@ -4394,7 +4469,7 @@ dpif_netdev_meter_set(struct dpif *dpif,
>>>> ofproto_meter_id *meter_id,
>>>>>          meter->flags = config->flags;
>>>>>          meter->n_bands = config->n_bands;
>>>>>          meter->max_delta_t = 0;
>>>>> -        meter->used = time_msec();
>>>>> +        meter->used = time_usec();
>>>>>
>>>>>          /* set up bands */
>>>>>          for (i = 0; i < config->n_bands; ++i) { @@ -4592,6 +4667,7
>>>>> @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct
>>>> dp_netdev *dp,
>>>>>      pmd->core_id = core_id;
>>>>>      pmd->numa_id = numa_id;
>>>>>      pmd->need_reload = false;
>>>>> +    pmd->n_output_batches = 0;
>>>>>
>>>>>      ovs_refcount_init(&pmd->ref_cnt);
>>>>>      latch_init(&pmd->exit_latch);
>>>>> @@ -4779,6 +4855,7 @@ dp_netdev_add_port_tx_to_pmd(struct
>>>>> dp_netdev_pmd_thread *pmd,
>>>>>
>>>>>      tx->port = port;
>>>>>      tx->qid = -1;
>>>>> +    tx->flush_time = 0LL;
>>>>>      dp_packet_batch_init(&tx->output_pkts);
>>>>>
>>>>>      hmap_insert(&pmd->tx_ports, &tx->node,
>>>>> hash_port_no(tx->port->port_no)); @@ -4942,7 +5019,7 @@
>>>> packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
>>>>>      struct dp_netdev_flow *flow = batch->flow;
>>>>>
>>>>>      dp_netdev_flow_used(flow, batch->array.count, batch->byte_count,
>>>>> -                        batch->tcp_flags, pmd->ctx.now);
>>>>> +                        batch->tcp_flags, pmd->ctx.now / 1000);
>>>>>
>>>>>      actions = dp_netdev_flow_get_actions(flow);
>>>>>
>>>>> @@ -5317,7 +5394,7 @@ dpif_netdev_xps_revalidate_pmd(const struct
>>>> dp_netdev_pmd_thread *pmd,
>>>>>              continue;
>>>>>          }
>>>>>          interval = pmd->ctx.now - tx->last_used;
>>>>> -        if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT_MS)) {
>>>>> +        if (tx->qid >= 0 && (purge || interval >= XPS_TIMEOUT)) {
>>>>>              port = tx->port;
>>>>>              ovs_mutex_lock(&port->txq_used_mutex);
>>>>>              port->txq_used[tx->qid]--; @@ -5338,7 +5415,7 @@
>>>>> dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>>>>>      interval = pmd->ctx.now - tx->last_used;
>>>>>      tx->last_used = pmd->ctx.now;
>>>>>
>>>>> -    if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT_MS)) {
>>>>> +    if (OVS_LIKELY(tx->qid >= 0 && interval < XPS_TIMEOUT)) {
>>>>>          return tx->qid;
>>>>>      }
>>>>>
>>>>> @@ -5470,12 +5547,16 @@ dp_execute_cb(void *aux_, struct
>>>>> dp_packet_batch
>>>> *packets_,
>>>>>                  dp_netdev_pmd_flush_output_on_port(pmd, p);
>>>>>              }
>>>>>  #endif
>>>>> -            if (OVS_UNLIKELY(dp_packet_batch_size(&p->output_pkts)
>>>>> -                       + dp_packet_batch_size(packets_) >
>>>> NETDEV_MAX_BURST)) {
>>>>> -                /* Some packets was generated while input batch
>>>> processing.
>>>>> -                 * Flush here to avoid overflow. */
>>>>> +            if (dp_packet_batch_size(&p->output_pkts)
>>>>> +                + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST)
>> {
>>>>> +                /* Flush here to avoid overflow. */
>>>>>                  dp_netdev_pmd_flush_output_on_port(pmd, p);
>>>>>              }
>>>>> +
>>>>> +            if (dp_packet_batch_is_empty(&p->output_pkts)) {
>>>>> +                pmd->n_output_batches++;
>>>>> +            }
>>>>> +
>>>>>              DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
>>>>>                  dp_packet_batch_add(&p->output_pkts, packet);
>>>>>              }
>>>>> @@ -5717,7 +5798,7 @@ dp_execute_cb(void *aux_, struct
>>>>> dp_packet_batch
>>>> *packets_,
>>>>>          conntrack_execute(&dp->conntrack, packets_,
>>>>> aux->flow->dl_type,
>>>> force,
>>>>>                            commit, zone, setmark, setlabel,
>>>>> aux->flow- tp_src,
>>>>>                            aux->flow->tp_dst, helper,
>>>> nat_action_info_ref,
>>>>> -                          pmd->ctx.now);
>>>>> +                          pmd->ctx.now / 1000);
>>>>>          break;
>>>>>      }
>>>>>
>>>>> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index
>>>>> 21ffaf5..bc6b1be 100644
>>>>> --- a/vswitchd/vswitch.xml
>>>>> +++ b/vswitchd/vswitch.xml
>>>>> @@ -359,6 +359,22 @@
>>>>>          </p>
>>>>>        </column>
>>>>>
>>>>> +      <column name="other_config" key="tx-flush-interval"
>>>>> +              type='{"type": "integer",
>>>>> +                     "minInteger": 0, "maxInteger": 1000000}'>
>>>>> +        <p>
>>>>> +          Specifies the time in microseconds that a packet can wait
>>>>> + in
>>>> output
>>>>> +          batch for sending i.e. amount of time that packet can
>>>>> + spend
>>>> in an
>>>>> +          intermediate output queue before sending to netdev.
>>>>> +          This option can be used to configure balance between
>>>> throughput
>>>>> +          and latency. Lower values decreases latency while higher
>>>> values
>>>>> +          may be useful to achieve higher performance.
>>>>> +        </p>
>>>>> +        <p>
>>>>> +          Defaults to 0 i.e. instant packet sending (latency
>>>> optimized).
>>>>> +        </p>
>>>>> +      </column>
>>>>> +
>>>>>        <column name="other_config" key="n-handler-threads"
>>>>>                type='{"type": "integer", "minInteger": 1}'>
>>>>>          <p>
>>>>>
>>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v8 5/6] dpif-netdev: Time based output batching.

Reply via email to