[copying netdev for centralized development]
Hi all,
some updates to acx-20060111:
- add some cache prefetching at critical places, but still unsure whether it
helps (some rdtscl() testing hasn't shown much yet),
thus make it configurable
- add recommended cpu_relax() to busy-wait loops
- use "counter % 8" instead of "counter % 5" for easier ASM calculation
- add ACX_IE_HDR__TYPE_LEN define for IE struct header variables used
everywhere
- reorder struct wlandevice_t for better(??) cache use
- kill superfluous result variable in conv.c
- misc. small cleanup
This patch is rediffed from my modified acx-20060109 tar, NOT compile-tested!
Andreas Mohr
diff -urN acx-20060111.orig/acx_config.h acx-20060111/acx_config.h
--- acx-20060111.orig/acx_config.h 2006-01-08 19:51:28.000000000 +0100
+++ acx-20060111/acx_config.h 2006-01-12 11:21:08.000000000 +0100
@@ -31,6 +31,9 @@
/* if you want very early packet fragmentation bits and pieces */
#define ACX_FRAGMENTATION 0
+/* not sure whether this helps, so make it configurable for testing */
+#define ACX_CACHE_PREFETCH 1
+
/* set to 1 if you want to have 1 driver per card instead of 1 single driver
* managing all cards (of a particular bus type) in your system
* Useful e.g. if you need to reinitialize single cards from time to time
diff -urN acx-20060111.orig/acx_func.h acx-20060111/acx_func.h
--- acx-20060111.orig/acx_func.h 2006-01-10 07:50:33.000000000 +0100
+++ acx-20060111/acx_func.h 2006-01-12 11:21:08.000000000 +0100
@@ -120,6 +120,14 @@
#endif /* ACX_DEBUG */
+#if ACX_CACHE_PREFETCH
+#define ACX_PREFETCH(p) prefetch(p)
+#define ACX_PREFETCHW(p) prefetchw(p)
+#else
+#define ACX_PREFETCH(p)
+#define ACX_PREFETCHW(p)
+#endif
+
void acx_print_mac(const char *head, const u8 *mac, const char *tail);
/* Optimized out to nothing in non-debug build */
@@ -171,7 +179,7 @@
static inline int
mac_is_bcast(const u8 *mac)
{
- /* AND together 4 first bytes with sign-entended 2 last bytes
+ /* AND together 4 first bytes with sign-extended 2 last bytes
** Only bcast address gives 0xffffffff. +1 gives 0 */
return ( *(s32*)mac & ((s16*)mac)[2] ) + 1 == 0;
}
@@ -254,7 +262,7 @@
** acx_s_xxxx - potentially sleeping functions. Do not ever call under lock!
** acx_l_xxxx - functions which expect lock to be already taken.
** rest - non-sleeping functions which do not require locking
-** but may be run inder lock
+** but may be run under lock
**
** A small number of local helpers do not have acx_[eisl]_ prefix.
** They are always close to caller and are to be revieved locally.
diff -urN acx-20060111.orig/acx_struct.h acx-20060111/acx_struct.h
--- acx-20060111.orig/acx_struct.h 2006-01-10 07:49:27.000000000 +0100
+++ acx-20060111/acx_struct.h 2006-01-12 11:21:08.000000000 +0100
@@ -1005,6 +1005,10 @@
#endif /* ACX_USB */
+#define ACX_IE_HDR__TYPE_LEN \
+ u16 type ACX_PACKED; \
+ u16 len ACX_PACKED;
+
/* Config Option structs */
typedef struct co_antennas {
@@ -1061,8 +1065,7 @@
} co_fixed_t;
typedef struct acx111_ie_configoption {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
/* Do not access below members directly, they are in fact variable length */
co_fixed_t fixed ACX_PACKED;
co_antennas_t antennas ACX_PACKED;
@@ -1225,9 +1228,9 @@
client_t *sta_hash_tab[64]; /* hash collisions are not
likely */
client_t *ap_client; /* this one is our AP (STA mode
only) */
- unsigned long dup_msg_expiry;
int dup_count;
int nondup_count;
+ unsigned long dup_msg_expiry;
u16 last_seq_ctrl; /* duplicate packet detection */
/* 802.11 power save mode */
@@ -1289,15 +1292,15 @@
wep_key_t wep_keys[DOT11_MAX_DEFAULT_WEP_KEYS]; /* the default
WEP keys */
key_struct_t wep_key_struct[10];
+ /*** Unknown ***/
+ u8 dtim_interval;
+
/*** Card Rx/Tx management ***/
u16 rx_config_1;
u16 rx_config_2;
u16 memblocksize;
- int tx_free;
- int tx_head;
-
- /*** Unknown ***/
- u8 dtim_interval;
+ unsigned int tx_free;
+ unsigned int tx_head; /* keep as close as possible to Tx stuff below
(cache line) */
/*************************************************************************
*** PCI/USB/... must be last or else hw agnostic code breaks horribly ***
@@ -1310,31 +1313,31 @@
#ifdef ACX_PCI
/* pointers to tx buffers, tx host descriptors (in host memory)
** and tx descs in device memory */
+ unsigned int tx_tail;
u8 *txbuf_start;
txhostdesc_t *txhostdesc_start;
txdesc_t *txdesc_start; /* points to PCI-mapped memory */
- /* same for rx */
- rxbuffer_t *rxbuf_start;
- rxhostdesc_t *rxhostdesc_start;
- rxdesc_t *rxdesc_start;
- /* physical addresses of above host memory areas */
- dma_addr_t rxbuf_startphy;
- /* dma_addr_t rxhostdesc_startphy; */
dma_addr_t txbuf_startphy;
dma_addr_t txhostdesc_startphy;
/* sizes of above host memory areas */
unsigned int txbuf_area_size;
unsigned int txhostdesc_area_size;
- unsigned int rxbuf_area_size;
- unsigned int rxhostdesc_area_size;
unsigned int txdesc_size; /* size of txdesc; ACX111 = ACX100 + 4
*/
- unsigned int tx_tail;
- unsigned int rx_tail;
-
client_t *txc[TX_CNT];
u16 txr[TX_CNT];
+ /* same for rx */
+ unsigned int rx_tail;
+ rxbuffer_t *rxbuf_start;
+ rxhostdesc_t *rxhostdesc_start;
+ rxdesc_t *rxdesc_start;
+ /* physical addresses of above host memory areas */
+ dma_addr_t rxbuf_startphy;
+ /* dma_addr_t rxhostdesc_startphy; */
+ unsigned int rxbuf_area_size;
+ unsigned int rxhostdesc_area_size;
+
u8 need_radio_fw;
u8 irqs_active; /* whether irq sending is activated */
@@ -1499,14 +1502,12 @@
/***********************************************************************
*/
typedef struct acx100_ie_memblocksize {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u16 size ACX_PACKED;
} acx100_ie_memblocksize_t;
typedef struct acx100_ie_queueconfig {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u32 AreaSize ACX_PACKED;
u32 RxQueueStart ACX_PACKED;
u8 QueueOptions ACX_PACKED;
@@ -1522,8 +1523,7 @@
} acx100_ie_queueconfig_t;
typedef struct acx111_ie_queueconfig {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u32 tx_memory_block_address ACX_PACKED;
u32 rx_memory_block_address ACX_PACKED;
u32 rx1_queue_address ACX_PACKED;
@@ -1535,8 +1535,7 @@
} acx111_ie_queueconfig_t;
typedef struct acx100_ie_memconfigoption {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u32 DMA_config ACX_PACKED;
acx_ptr pRxHostDesc ACX_PACKED;
u32 rx_mem ACX_PACKED;
@@ -1546,8 +1545,7 @@
} acx100_ie_memconfigoption_t;
typedef struct acx111_ie_memoryconfig {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u16 no_of_stations ACX_PACKED;
u16 memory_block_size ACX_PACKED;
u8 tx_rx_memory_block_allocation ACX_PACKED;
@@ -1575,8 +1573,7 @@
} acx111_ie_memoryconfig_t;
typedef struct acx_ie_memmap {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u32 CodeStart ACX_PACKED;
u32 CodeEnd ACX_PACKED;
u32 WEPCacheStart ACX_PACKED;
@@ -1590,15 +1587,13 @@
} acx_ie_memmap_t;
typedef struct acx111_ie_feature_config {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u32 feature_options ACX_PACKED;
u32 data_flow_options ACX_PACKED;
} acx111_ie_feature_config_t;
typedef struct acx111_ie_tx_level {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u8 level ACX_PACKED;
} acx111_ie_tx_level_t;
@@ -1617,8 +1612,7 @@
#define PS_OPT_STILL_RCV_BCASTS 0x01
typedef struct acx100_ie_powermgmt {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u8 wakeup_cfg ACX_PACKED;
u8 listen_interval ACX_PACKED; /* for EACH_ITVL: wake up every
"beacon units" interval */
u8 options ACX_PACKED;
@@ -1627,8 +1621,7 @@
} acx100_ie_powermgmt_t;
typedef struct acx111_ie_powermgmt {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u8 wakeup_cfg ACX_PACKED;
u8 listen_interval ACX_PACKED; /* for EACH_ITVL: wake up every
"beacon units" interval */
u8 options ACX_PACKED;
@@ -1858,16 +1851,14 @@
} acx_cmd_radioinit_t;
typedef struct acx100_ie_wep_options {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u16 NumKeys ACX_PACKED; /* max # of keys */
u8 WEPOption ACX_PACKED; /* 0 == decrypt default key only, 1 ==
override decrypt */
u8 Pad ACX_PACKED; /* used only for acx111 */
} acx100_ie_wep_options_t;
typedef struct ie_dot11WEPDefaultKey {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u8 action ACX_PACKED;
u8 keySize ACX_PACKED;
u8 defaultKeyNum ACX_PACKED;
@@ -1887,8 +1878,7 @@
} acx111WEPDefaultKey_t;
typedef struct ie_dot11WEPDefaultKeyID {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
u8 KeyID ACX_PACKED;
} ie_dot11WEPDefaultKeyID_t;
@@ -1906,8 +1896,7 @@
*/
typedef struct acx_ie_generic {
- u16 type ACX_PACKED;
- u16 len ACX_PACKED;
+ ACX_IE_HDR__TYPE_LEN
union {
/* struct wep wp ACX_PACKED; */
/* Association ID IE: just a 16bit value: */
diff -urN acx-20060111.orig/common.c acx-20060111/common.c
--- acx-20060111.orig/common.c 2006-01-10 08:20:39.000000000 +0100
+++ acx-20060111/common.c 2006-01-12 11:21:08.000000000 +0100
@@ -155,7 +155,7 @@
void
acx_lock_debug(wlandevice_t *priv, const char* where)
{
- int count = 100*1000*1000;
+ unsigned int count = 100*1000*1000;
where = sanitize_str(where);
while (--count) {
if (!spin_is_locked(&priv->lock)) break;
@@ -179,7 +179,7 @@
}
#endif
if (acx_debug & L_LOCK) {
- unsigned diff;
+ unsigned long diff;
rdtscl(diff);
diff -= priv->lock_time;
if (diff > max_lock_time) {
@@ -228,7 +228,7 @@
dump_stack();
}
if (acx_debug & L_LOCK) {
- unsigned diff = jiffies - priv->sem_time;
+ unsigned long diff = jiffies - priv->sem_time;
if (diff > max_sem_time) {
where = sanitize_str(where);
printk("max sem hold time %d jiffies from %s "
@@ -353,7 +353,7 @@
"STOPPED", "SCANNING", "WAIT_AUTH",
"AUTHENTICATED", "ASSOCIATED", "INVALID??"
};
- if (status >= VEC_SIZE(str))
+ if (status > VEC_SIZE(str)-1)
status = VEC_SIZE(str)-1;
return str[status];
@@ -2621,6 +2621,7 @@
* be expressed in dBm, or it's some pretty complicated
* calculation. */
+ PREFETCHW(&priv->wstats.qual);
#ifdef FROM_SCAN_SOURCE_ONLY
/* only consider packets originating from the MAC
* address of the device that's managing our BSSID.
@@ -2968,9 +2969,11 @@
acx_l_sta_list_get(wlandevice_t *priv, const u8 *address)
{
client_t *client;
+
FN_ENTER;
client = acx_l_sta_list_get_from_hash(priv, address);
while (client) {
+ PREFETCH(client->next);
if (mac_is_equal(address, client->address)) {
client->mtime = jiffies;
break;
@@ -2995,6 +2998,7 @@
/* tricky. next = client on first iteration only,
** on all other iters next = client->next */
while (next) {
+ PREFETCH(client->next);
if (next == victim) {
client->next = victim->next;
/* Overkill */
@@ -3729,7 +3733,7 @@
priv->netdev->name, *req->reason,
acx_wlan_reason_str(*req->reason));
- /* Chk: is ta is verified to be from our AP? */
+ /* Chk: is ta verified to be from our AP? */
if (mac_is_equal(priv->dev_addr, req->hdr->a1)) {
log(L_DEBUG, "AP sent us deauth packet\n");
SET_BIT(priv->set_mask, GETSET_RESCAN);
@@ -3755,6 +3759,7 @@
skb = acx_rxbuf_to_ether(priv, rxbuf);
if (likely(skb)) {
netif_rx(skb);
+ PREFETCHW(&priv->stats);
priv->netdev->last_rx = jiffies;
priv->stats.rx_packets++;
priv->stats.rx_bytes += skb->len;
@@ -5094,7 +5099,7 @@
printk("%s: no matching station found in range, "
"generating our own IBSS instead\n",
priv->netdev->name);
- /* we do it hostap way: */
+ /* we do it the HostAP way: */
MAC_COPY(priv->bssid, priv->dev_addr);
priv->bssid[0] |= 0x02; /* 'local assigned addr' bit */
/* add IBSS bit to our caps... */
diff -urN acx-20060111.orig/conv.c acx-20060111/conv.c
--- acx-20060111.orig/conv.c 2006-01-10 08:02:48.000000000 +0100
+++ acx-20060111/conv.c 2006-01-12 11:21:08.000000000 +0100
@@ -138,8 +138,7 @@
struct wlan_llc *e_llc;
struct wlan_snap *e_snap;
const u8 *a1, *a3;
- int header_len, payload_len;
- int result = -1;
+ int header_len, payload_len = -1;
/* protocol type or data length, depending on whether
* DIX or 802.3 ethernet format */
u16 proto;
@@ -164,7 +163,7 @@
goto end;
}
memcpy(w_hdr, skb->data, skb->len);
- result = skb->len;
+ payload_len = skb->len;
goto end;
}
@@ -206,7 +205,6 @@
/* TODO: can we just let acx DMA payload from skb instead? */
memcpy((u8*)txbuf + header_len, skb->data + sizeof(wlan_ethhdr_t),
payload_len);
payload_len += header_len;
- result = payload_len;
/* Set up the 802.11 header */
switch (priv->mode) {
@@ -228,7 +226,7 @@
default:
printk("%s: error - converting eth to wlan in unknown mode\n",
priv->netdev->name);
- result = -1;
+ payload_len = -1;
goto end;
}
if (priv->wep_enabled)
@@ -251,8 +249,8 @@
#endif
end:
- FN_EXIT1(result);
- return result;
+ FN_EXIT1(payload_len);
+ return payload_len;
}
@@ -330,8 +328,8 @@
e_hdr = (wlan_ethhdr_t*) ((u8*) w_hdr + payload_offset);
e_llc = (wlan_llc_t*) e_hdr;
e_snap = (wlan_snap_t*) (e_llc + 1);
- e_payload = (u8*) (e_snap + 1);
mtu = priv->netdev->mtu;
+ e_payload = (u8*) (e_snap + 1);
log(L_DATA, "rx: payload_offset %d, payload_length %d\n",
payload_offset, payload_length);
diff -urN acx-20060111.orig/pci.c acx-20060111/pci.c
--- acx-20060111.orig/pci.c 2006-01-10 08:03:59.000000000 +0100
+++ acx-20060111/pci.c 2006-01-12 11:21:08.000000000 +0100
@@ -223,7 +223,10 @@
static inline client_t*
get_txc(wlandevice_t* priv, txdesc_t* txdesc)
{
- int index = (u8*)txdesc - (u8*)priv->txdesc_start;
+ int index;
+
+ ACX_PREFETCH(priv->txc);
+ index = (u8*)txdesc - (u8*)priv->txdesc_start;
if (unlikely(ACX_DEBUG && (index % priv->txdesc_size))) {
printk("bad txdesc ptr %p\n", txdesc);
return NULL;
@@ -239,7 +242,10 @@
static inline u16
get_txr(wlandevice_t* priv, txdesc_t* txdesc)
{
- int index = (u8*)txdesc - (u8*)priv->txdesc_start;
+ int index;
+
+ ACX_PREFETCH(priv->txr);
+ index = (u8*)txdesc - (u8*)priv->txdesc_start;
index /= priv->txdesc_size;
return priv->txr[index];
}
@@ -247,7 +253,11 @@
static inline void
put_txcr(wlandevice_t* priv, txdesc_t* txdesc, client_t* c, u16 r111)
{
- int index = (u8*)txdesc - (u8*)priv->txdesc_start;
+ int index;
+
+ ACX_PREFETCHW(priv->txc);
+ ACX_PREFETCHW(priv->txr);
+ index = (u8*)txdesc - (u8*)priv->txdesc_start;
if (unlikely(ACX_DEBUG && (index % priv->txdesc_size))) {
printk("bad txdesc ptr %p\n", txdesc);
return;
@@ -302,6 +312,7 @@
result = NOT_OK;
goto fail;
}
+ cpu_relax();
}
*charbuf = read_reg8(priv, IO_ACX_EEPROM_DATA);
@@ -369,6 +380,7 @@
"Timeout waiting for EEPROM write\n");
goto end;
}
+ cpu_relax();
}
}
@@ -389,6 +401,7 @@
printk("timeout waiting for EEPROM read\n");
goto end;
}
+ cpu_relax();
}
data_verify[i] = read_reg16(priv, IO_ACX_EEPROM_DATA);
@@ -435,6 +448,7 @@
*charbuf = 0;
goto fail;
}
+ cpu_relax();
}
log(L_DEBUG, "count was %u\n", count);
@@ -1029,6 +1043,7 @@
FN_ENTER;
+ ACX_PREFETCH(buffer);
devname = priv->netdev->name;
if (!devname || !devname[0] || devname[4]=='%')
devname = "acx";
@@ -1057,14 +1072,16 @@
/* Test for IDLE state */
if (!cmd_status)
break;
- if (counter % 5 == 0) {
+ if (counter % 8 == 0) {
if (time_after(jiffies, timeout)) {
counter = 0;
break;
}
- /* we waited 5 iterations, no luck. Sleep 5 ms */
- acx_s_msleep(5);
+ /* we waited 8 iterations, no luck. Sleep 8 ms */
+ acx_s_msleep(8);
}
+ else
+ cpu_relax();
} while (likely(--counter));
if (!counter) {
@@ -1078,6 +1095,7 @@
}
/* now write the parameters of the command if needed */
+ ACX_PREFETCHW(priv->cmd_area);
if (buffer && buflen) {
/* if it's an INTERROGATE command, just pass the length
* of parameters to read, as data */
@@ -1123,14 +1141,16 @@
break;
}
- if (counter % 5 == 0) {
+ if (counter % 8 == 0) {
if (time_after(jiffies, timeout)) {
counter = 0;
break;
}
- /* we waited 5 iterations, no luck. Sleep 5 ms */
- acx_s_msleep(5);
+ /* we waited 8 iterations, no luck. Sleep 8 ms */
+ acx_s_msleep(8);
}
+ else
+ cpu_relax();
} while (likely(--counter));
/* save state for debugging */
@@ -2319,6 +2339,8 @@
register rxhostdesc_t *hostdesc;
int count, tail;
+ ACX_PREFETCH(&priv->rx_tail);
+
FN_ENTER;
if (unlikely(acx_debug & L_BUFR))
@@ -2331,6 +2353,8 @@
count = RX_CNT;
while (1) {
hostdesc = &priv->rxhostdesc_start[tail];
+
+ ACX_PREFETCH(hostdesc);
/* advance tail regardless of outcome of the below test */
tail = (tail + 1) % RX_CNT;
@@ -2344,6 +2368,8 @@
/* now process descriptors, starting with the first we figured out */
while (1) {
+ ACX_PREFETCH(hostdesc->data);
+
log(L_BUFR, "rx: tail=%u Ctl_16=%04X Status=%08X\n",
tail, hostdesc->Ctl_16, hostdesc->Status);
@@ -3103,7 +3129,9 @@
void*
acxpci_l_get_txbuf(wlandevice_t *priv, tx_t* tx_opaque)
{
- return get_txhostdesc(priv, (txdesc_t*)tx_opaque)->data;
+ void *buf = get_txhostdesc(priv, (txdesc_t*)tx_opaque)->data;
+ ACX_PREFETCHW(buf);
+ return buf;
}
@@ -3415,6 +3443,8 @@
u16 r111;
u8 error, ack_failures, rts_failures, rts_ok, r100;
+ ACX_PREFETCH(&priv->tx_tail);
+
FN_ENTER;
if (unlikely(acx_debug & L_DEBUG))
diff -urN acx-20060111.orig/usb.c acx-20060111/usb.c
--- acx-20060111.orig/usb.c 2006-01-10 08:03:08.000000000 +0100
+++ acx-20060111/usb.c 2006-01-12 11:21:08.000000000 +0100
@@ -1618,7 +1618,7 @@
printk("%d ", usbdev->ep_in[i]->desc.wMaxPacketSize);
printk("\n");
printk(" ep_out wMaxPacketSize: ");
- for (i = 0; i < 15; ++i)
+ for (i = 0; i < 16; ++i)
printk("%d ", usbdev->ep_out[i]->desc.wMaxPacketSize);
printk("\n");
#else