[snip]
>>>
>>> I don't think it complicates development and instead I feel the
>>> commit gives a clear indication to the developer that the members are
>>> grouped and
>>aligned and marked with cacheline markers.
>>> This makes the developer extra cautious when adding new members so
>>> that
>>holes can be avoided.
>>
>>Starting rebase of the output batching patch-set I figured out that I
>>need to remove 'unsigned long long last_cycles' and add 'struct
>>dp_netdev_pmd_thread_ctx ctx'
>>which is 8 bytes larger. Could you, please, suggest me where should I
>>place that new structure member and what to do with a hole from
>'last_cycles'?
>>
>>This is not a trivial question, because already poor grouping will
>>become worse almost anyway.
>
>Aah, realized now that the batching series doesn't cleanly apply on master.
>Let me check this and will send across the changes that should fix this.
>
I see that 2 patches of the output batching series touches this structure and I
modified
the structure to factor in below changes introduced in batching series.
- Include dp_netdev_pmd_thread_ctx structure.
- Include n_output_batches variable.
- Change in sizes of dp_netdev_pmd_stats struct and stats_zero .
- ovs_mutex size ( 48bytes on x86 vs 56bytes in ARM)
Also carried some testing and found no performance impact with the below
changes.
---------------------------------------------------------------------------------------------------
struct dp_netdev_pmd_thread {
PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
struct dp_netdev *dp;
struct cmap_node node; /* In 'dp->poll_threads'. */
pthread_cond_t cond; /* For synchronizing pmd thread
reload. */
);
PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
struct ovs_mutex cond_mutex; /* Mutex for condition variable. */
pthread_t thread;
);
/* Per thread exact-match cache. Note, the instance for cpu core
* NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
* need to be protected by 'non_pmd_mutex'. Every other instance
* will only be accessed by its own pmd thread. */
PADDED_MEMBERS(CACHE_LINE_SIZE,
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
);
/* Flow-Table and classifiers
*
* Writers of 'flow_table' must take the 'flow_mutex'. Corresponding
* changes to 'classifiers' must be made while still holding the
* 'flow_mutex'.
*/
PADDED_MEMBERS(CACHE_LINE_SIZE,
struct ovs_mutex flow_mutex;
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
struct cmap flow_table OVS_GUARDED; /* Flow table. */
/* One classifier per in_port polled by the pmd */
struct cmap classifiers;
/* Periodically sort subtable vectors according to hit frequencies */
long long int next_optimization;
/* End of the next time interval for which processing cycles
are stored for each polled rxq. */
long long int rxq_next_cycle_store;
/* Cycles counters */
struct dp_netdev_pmd_cycles cycles;
/* Current context of the PMD thread. */
struct dp_netdev_pmd_thread_ctx ctx;
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
/* Statistics. */
struct dp_netdev_pmd_stats stats;
/* 8 pad bytes. */
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
struct latch exit_latch; /* For terminating the pmd thread. */
struct seq *reload_seq;
uint64_t last_reload_seq;
atomic_bool reload; /* Do we need to reload ports? */
/* Set to true if the pmd thread needs to be reloaded. */
bool need_reload;
bool isolated;
struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed.
*/
/* Queue id used by this pmd thread to send packets on all netdevs if
* XPS disabled for this netdev. All static_tx_qid's are unique and less
* than 'cmap_count(dp->poll_threads)'. */
uint32_t static_tx_qid;
/* Number of filled output batches. */
int n_output_batches;
unsigned core_id; /* CPU core id of this pmd thread. */
int numa_id; /* numa node id of this pmd thread. */
/* 16 pad bytes. */
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
struct ovs_mutex port_mutex; /* Mutex for 'poll_list'
and 'tx_ports'. */
/* 16 pad bytes. */
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
/* List of rx queues to poll. */
struct hmap poll_list OVS_GUARDED;
/* Map of 'tx_port's used for transmission. Written by the main
* thread, read by the pmd thread. */
struct hmap tx_ports OVS_GUARDED;
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
/* These are thread-local copies of 'tx_ports'. One contains only
* tunnel ports (that support push_tunnel/pop_tunnel), the other
* contains ports with at least one txq (that support send).
* A port can be in both.
*
* There are two separate maps to make sure that we don't try to
* execute OUTPUT on a device which has 0 txqs or PUSH/POP on a
* non-tunnel device.
*
* The instances for cpu core NON_PMD_CORE_ID can be accessed by
* multiple threads and thusly need to be protected by 'non_pmd_mutex'.
* Every other instance will only be accessed by its own pmd thread. */
struct hmap tnl_port_cache;
struct hmap send_port_cache;
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
/* Only a pmd thread can write on its own 'cycles' and 'stats'.
* The main thread keeps 'stats_zero' and 'cycles_zero' as base
* values and subtracts them from 'stats' and 'cycles' before
* reporting to the user */
unsigned long long stats_zero[DP_N_STATS];
/* 8 pad bytes. */
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
uint64_t cycles_zero[PMD_N_CYCLES];
/* 48 pad bytes. */
);
};
---------------------------------------------------------------------------------------------------
- Bhanuprakash.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev