diff --git a/LINUX/bsd_glue.h b/LINUX/bsd_glue.h index 4d5192eb2..341346444 100644 --- a/LINUX/bsd_glue.h +++ b/LINUX/bsd_glue.h @@ -325,10 +325,15 @@ static inline int ilog2(uint64_t n) } #endif /* ilog2 */ +/* + * TODO: ugly hack: abuse that contigmalloc() is only called from + * netmap_finalize_obj_allocator(), and numanode exists in that + * context. + */ #define contigmalloc(sz, ty, flags, a, b, pgsz, c) ({ \ unsigned int order_ = \ ilog2(roundup_pow_of_two(sz)/PAGE_SIZE); \ - struct page *p_ = alloc_pages(GFP_ATOMIC | __GFP_ZERO, \ + struct page *p_ = alloc_pages_node(numanode, GFP_ATOMIC | __GFP_ZERO, \ order_); \ if (p_ != NULL) \ split_page(p_, order_); \ diff --git a/private/LINUX/bnx2x_netmap_linux.h b/private/LINUX/bnx2x_netmap_linux.h deleted file mode 100644 index a8b94bb20..000000000 --- a/private/LINUX/bnx2x_netmap_linux.h +++ /dev/null @@ -1,595 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: bnx2x_netmap_linux.h $ - * - * netmap support for bnx2x (LINUX version) - * - * The programming manual is publicly available at - * http://www.broadcom.com/collateral/pg/57710_57711-PG200-R.pdf - * http://www.broadcom.com/collateral/pg/57XX-PG105-R.pdf - * but they do not match the code in the Linux or FreeBSD driversi (bnx2x, bxe). - * The FreeBSD driver has a number of comments in the code that explain a lot - * of the constraints in the firmware. - * - * Of particular relevance: - -The buffer descriptor (bd) and packet (pkt) indexes handled by -the firmware are 16-bit values, no matter how big the rings are. -The current driver then has a number of BD slots which is also -a power of 2 so truncation does the right thing when accessing the arrays. -Conversion of these indexes to NIC ring indexes should be done -using TX_BD() and RX_BD() macros - -In the linux driver, NUM_TX_RINGS and NUM_RX_RINGS do not indicate -NIC rings but the number of 4K pages used to store the rings. -NIC rings are made of 8(rx) or 16(tx) byte entries, with the -last 16 bytes in each page containing the pointer to the next page. -Hence index increment should use the NEXT_TX_IDX() and NEXT_RX_IDX() -macros to skip the link entries. - -RX completions and other events are reported through a Request Completion Queue -(RCQ) with 16-byte entries, again linked with the usual scheme. -Navigate through them with the NEXT_RCQ_IDX() macro, and truncate -the values with RCQ_BD() - -The TX ring REQUIRES at least two BD per packet even though the -programming manual says differently. - -For each Class Of Service (COS) we have NUM_TX_BD slots in total. - - */ - - -#include -#include -#include -#define SOFTC_T bnx2x - -int bnx2x_netmap_config(struct SOFTC_T *adapter); - -#ifdef NETMAP_BNX2X_MAIN -static inline void -nm_pkt_dump(int i, char *buf, int len) -{ - uint8_t *s = buf+6, *d = buf; - RD(10, "%d len %4d %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x", - i, - len, - s[0], s[1], s[2], s[3], s[4], s[5], - d[0], d[1], d[2], d[3], d[4], d[5]); -} - -/* - * Some diagnostic to figure out the configuration. - */ -static inline void -bnx2x_netmap_diag(struct ifnet *ifp) -{ - struct SOFTC_T *bp = netdev_priv(ifp); - struct bnx2x_fastpath *fp = &bp->fp[0]; - struct bnx2x_fp_txdata *txdata = &fp->txdata[0]; - int i; - - D("---- device %s ---- fp0 %p txdata %p q %d txq %d rxq %d -------", - ifp->name, fp, txdata, BNX2X_NUM_QUEUES(bp), - ifp->num_tx_queues, ifp->num_rx_queues); - // txq is actually 48, whereas rxq is a reasonable number. - for (i = 0; i < BNX2X_NUM_QUEUES(bp); i++) { - fp = &bp->fp[i]; - txdata = &fp->txdata[0]; - D("TX%2d: desc_ring %p %p cid %d txq_index %d cons_sb %p", i, - txdata->tx_desc_ring, - &txdata->tx_desc_ring[10].start_bd, - txdata->cid, txdata->txq_index, - txdata->tx_cons_sb); - } -} - -/* - * Register/unregister. We are already under (netmap) core lock. - * Only called on the first register or the last unregister. - */ -static int -bnx2x_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *adapter = netdev_priv(ifp); - int error = 0, need_load = 0; - - /* - * On enable, flush pending ops, set flag and reinit rings. - * On disable, flush again, and restart the interface. - */ - D("setting netmap mode for %s to %s", na->name, onoff ? "ON" : "OFF"); - // bnx2x_netmap_diag(ifp); - - rtnl_lock(); // required by bnx2x_nic_unload() - if (netif_running(ifp)) { - D("unloading the nic"); - bnx2x_nic_unload(adapter, UNLOAD_NORMAL); - need_load = 1; - } - -if (0) // only load/unload - error = EINVAL; -else - if (onoff) { /* enable netmap mode */ - nm_set_native_flags(na); - D("-------------- set the SKIP_INTR flag"); - // XXX na->na_flags |= NAF_SKIP_INTR; /* during load, use regular interrupts */ - } else { /* reset normal mode */ - nm_clear_native_flags(na); - } - if (need_load) { - D("loading the NIC"); - bnx2x_nic_load(adapter, LOAD_NORMAL); - } - rtnl_unlock(); - return (error); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - -Broadcom: the tx routine is bnx2x_start_xmit() - -The card has 16 hardware queues ("fastpath contexts"), -each possibly with several "Class of Service" (COS) queues. -(the data sheet says up to 16 COS, but the software seems to use 4). -The linux driver numbers queues 0..15 for COS=0, 16..31 for COS=1, -and so on. The low 4 bits are used to indicate the fastpath context. - -The tx ring is made of one or more pages containing Buffer Descriptors (BD) -stored in fp->tx_desc_ring[], -each 16-byte long (NOTE: different from the rx side). The last BD in a page -(also 16 bytes) points to the next page (8 for physical address + 8 reserved bytes). -These page are presumably contiguous in virtual address space so all it takes -is to skip the reserved entries when we reach the last entry on the page -(MAX_TX_DESC_CNT - 1, or 255). - -The driver differs from the documentation. In particular the END_BD flag -seems not to exist anymore, presumably the firmware can derive the number -of buffers from the START_BD flag plus nbd. -It is unclear from the docs whether we can have only one BD per packet -The field to initialize are (all in LE format) - addr_lo, addr_hi LE32, physical buffer address - nbytes LE16, packet size - vlan LE16 ?? producer index ??? - nbd L8 2 seems the min required - bd_flags.as_bitfield L8 START_BD XXX no END_BD - general_data L8 0 0..5: header_nbd; 6-7: addr type - -and once we are done 'ring the doorbell' (write to a register) -to tell the NIC the first empty slot in the queue. - - struct bnx2x_fastpath *fp = &bp->fp[ring_nr % 16]; - struct bnx2x_fp_txdata *txdata = &fp->txdata[ring_nr / 16]; - -In txdata, The HOST ring is tx_buf_ring, and the NIC RING tx_desc_ring, -cid is the 'context id' or ring_nr % 16 . - -We operate under the assumption that we use only the first -set of queues. - - */ -static int -bnx2x_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->tx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - struct SOFTC_T *adapter = netdev_priv(ifp); - struct bnx2x_fastpath *fp = &adapter->fp[ring_nr]; - struct bnx2x_fp_txdata *txdata = &fp->txdata[0]; - int error = 0; - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - if (txdata->tx_desc_ring == NULL) { - D("------------------- bad! tx_desc_ring not set"); - error = EINVAL; - goto err; - } - nic_i = txdata->tx_bd_prod; - ND(10,"=======>========== send from %d to %d at bd %d", j, k, l); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint16_t len = slot->len; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - /* device-specific */ - struct eth_tx_start_bd *bd = - &txdata->tx_desc_ring[TX_BD(nic_i)].start_bd; - uint16_t mac_type = UNICAST_ADDRESS; - - // nm_pkt_dump(j, addr, len); - ND(5, "start_bd j %d l %d is %p", j, l, bd); - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, unload and reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - /* - * Fill the slot in the NIC ring. FreeBSD's if_bxe.c has - * a lot of notes including: - * - min number of nbd is 2 even if the parsing bd is not used, - * otherwise we get an MC assert! error - * - if vlan is not used, firmware expect a packet number there. - * - do we care for mac-type ? - */ - - bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - bd->vlan_or_ethertype = cpu_to_le16(txdata->tx_pkt_prod); - - bd->addr_lo = cpu_to_le32(U64_LO(paddr)); - bd->addr_hi = cpu_to_le32(U64_HI(paddr)); - bd->nbytes = cpu_to_le16(len); - bd->nbd = cpu_to_le16(2); - if (unlikely(is_multicast_ether_addr(addr))) { - if (is_broadcast_ether_addr(addr)) - mac_type = BROADCAST_ADDRESS; - else - mac_type = MULTICAST_ADDRESS; - } - SET_FLAG(bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE, mac_type); - SET_FLAG(bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1 /* XXX */ ); - - nm_i = nm_next(nm_i, lim); - txdata->tx_pkt_prod++; - nic_i = NEXT_TX_IDX(nic_i); // skip link fields. - /* clear the parsing block */ - bzero(&txdata->tx_desc_ring[TX_BD(nic_i)], sizeof(*bd)); - nic_i = NEXT_TX_IDX(nic_i); // skip link fields. - } - kring->nr_hwcur = head; - /* decrease avail by # of packets sent minus previous ones */ - - /* XXX Check how to deal with nkr_hwofs */ - /* these two are always in sync. */ - txdata->tx_bd_prod = nic_i; - txdata->tx_db.data.prod = nic_i; // update doorbell - - wmb(); /* synchronize writes to the NIC ring */ - barrier(); // XXX - /* (re)start the transmitter up to slot l (excluded) */ - ND(5, "doorbell cid %d data 0x%x", txdata->cid, txdata->tx_db.raw); - DOORBELL(adapter, ring_nr, txdata->tx_db.raw); - } - - /* - * Second part: reclaim buffers for completed transmissions. - * - * Reclaim buffers for completed transmissions, as in bnx2x_tx_int(). - * Maybe we could do it lazily. - */ - for (n=0;n < 5;n++) { - /* - * Record completed transmissions. - * The card writes the current (pkt ?) index in memory in - * le16_to_cpu(*txdata->tx_cons_sb); - * This seems to be a sequential index with no skips modulo 2^16 - * irrespective of the actual ring size. - * We need to adjust buffer and packet indexes. - * In netmap we can use 1 pkt/1bd so the pkt_cons - * is an index in the netmap buffer. The bd_index - * however should be computed with some trick. - * We (re)use the driver's txr->tx_pkt_cons to keep - * track of the most recently completed transmission. - */ - nic_i = le16_to_cpu(*txdata->tx_cons_sb); - if (nic_i != txdata->tx_pkt_cons) { // XXX buffers, not slots - ND(5, "txr %d completed %d packets", ring_nr, delta); - /* some tx completed, advance hwtail. */ - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - /* XXX lazy solution - consume 2 buffers */ - for (;txdata->tx_pkt_cons != nic_i; txdata->tx_pkt_cons++) { - txdata->tx_bd_cons = NEXT_TX_IDX(txdata->tx_bd_cons); - txdata->tx_bd_cons = NEXT_TX_IDX(txdata->tx_bd_cons); - } - } - } - if (txdata->tx_pkt_cons != txdata->tx_pkt_prod) { - // XXX kick the sender, does not seem to help. - wmb(); /* synchronize writes to the NIC ring */ - barrier(); // XXX - /* (re)start the transmitter up to slot l (excluded) */ - ND(5, "doorbell cid %d data 0x%x", txdata->cid, txdata->tx_db.raw); - DOORBELL(adapter, ring_nr, txdata->tx_db.raw); - } -out: - return 0; -err: - if (error) - return netmap_ring_reinit(kring); - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - -Broadcom: - -see bnx2x_cmn.c :: bnx2x_rx_int() - -the software keeps two sets of producer and consumer indexes: -one in the completion queue (fp->rx_comp_cons, fp->rx_comp_prod) -and one in the buffer descriptors (fp->rx_bd_cons, fp->rx_bd_prod). - -The processing loop iterates on the completion queue, and -buffers are consumed only after 'fastpath' events. - -The hardware reports the first empty slot through -(*fp->rx_cons_sb) (skipping the link field). - -20120913 -The code in bnx2x_rx_int() has a strange thing, it keeps -two running counters bd_prod and bd_prod_fw which are -apparently the same. - - - */ -static int -bnx2x_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->rx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - struct SOFTC_T *adapter = netdev_priv(ifp); - struct bnx2x_fastpath *rxr = &adapter->fp[ring_nr]; - uint16_t hw_comp_cons, sw_comp_cons; - -return 0; // XXX unsupported now - - if (!netif_carrier_ok(ifp)) - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - rmb(); - /* - * First part, import newly received packets into the netmap ring. - * - * rxr->next_to_check is set to 0 on a ring reinit - */ - - /* scan the completion queue to see what is going on. - * Note that we do not use l here. - */ - sw_comp_cons = RCQ_BD(rxr->rx_comp_cons); - nic_i = rxr->rx_bd_cons; - nm_i = netmap_idx_n2k(kring, nic_i); - hw_comp_cons = le16_to_cpu(*rxr->rx_cons_sb); - if ((hw_comp_cons & MAX_RCQ_DESC_CNT) == MAX_RCQ_DESC_CNT) - hw_comp_cons++; - - rmb(); // XXX -ND("start ring %d k %d lim %d hw_comp_cons %d", ring_nr, k, lim, hw_comp_cons); -goto done; // XXX debugging - - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - for (n = 0; sw_comp_cons != hw_comp_cons; sw_comp_cons = RCQ_BD(NEXT_RCQ_IDX(sw_comp_cons)) ) { - union eth_rx_cqe *cqe = &rxr->rx_comp_ring[l]; - struct eth_fast_path_rx_cqe *cqe_fp = &cqe->fast_path_cqe; - // XXX fetch event, process slowpath as in the main driver, - if (1 /* slowpath */) - continue; - ring->slot[nm_i].len = le16_to_cpu(cqe_fp->pkt_len_or_gro_seg_len); - ring->slot[nm_i].flags = slot_flags; - - nic_i = NEXT_RX_IDX(nic_i); - nm_i = nm_next(nic_i, lim) - n++; - } - if (n) { /* update the state variables */ - rxr->rx_comp_cons = sw_comp_cons; // XXX adjust nkr_hwofs - rxr->rx_bd_cons = nic_i; // XXX adjust nkr_hwofs - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* userspace has released some packets. */ - uint16_t sw_comp_prod = 0; // XXX - - nic_i = netmap_idx_k2n(kring, nic_i); - for (n = 0; nm_i != head; n++) { -#if 0 // XXX receive code still incomplete - struct netmap_slot *slot = &ring->slot[nm_i]; - union ixgbe_adv_rx_desc *curr = IXGBE_RX_DESC_ADV(rxr, nic_i); - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); -#endif // XXX - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - // XXXX cons = ... - wmb(); - /* Update producers */ - bnx2x_update_rx_prod(adapter, rxr, nic_i, sw_comp_prod, - rxr->rx_sge_prod); - } -done: - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * If in netmap mode, attach the netmap buffers to the ring and return true. - * Otherwise return false. - * Called at the end of bnx2x_alloc_fp_mem_at(), sets both tx and rx - * buffer entries. At init time we allocate the max number of entries - * for the card, but at runtime the card might use a smaller number, - * so be careful on where we fetch the information. - */ -int -bnx2x_netmap_config(struct SOFTC_T *bp) -{ - struct netmap_adapter *na = NA(bp->dev); - struct netmap_slot *slot; - struct bnx2x_fastpath *fp; - struct bnx2x_fp_txdata *txdata; - int j, ring_nr; - int nq; /* number of queues to use */ - - slot = netmap_reset(na, NR_TX, 0, 0); // quick test on first ring - if (!slot) - return 0; // not in native mode - nq = na->num_rx_rings; - D("# queues: tx %d rx %d act %d %d", - bp->dev->num_tx_queues, bp->dev->num_rx_queues, - BNX2X_NUM_QUEUES(bp), nq ); - if (BNX2X_NUM_QUEUES(bp) < nq) { - nq = BNX2X_NUM_QUEUES(bp); - D("******** wartning, truncate to %d rings", nq); - } - D("allocate memory, tx/rx slots: %d %d max %d %d", - (int)bp->tx_ring_size, (int)bp->rx_ring_size, - na->num_tx_desc, na->num_rx_desc); - for (ring_nr = 0; ring_nr < nq; ring_nr++) { - netmap_reset(na, NR_TX, ring_nr, 0); - } - /* - * Do nothing on the tx ring, addresses are set up at tx time. - */ - fp = &bp->fp[0]; - txdata = &fp->txdata[0]; - ND("tx: pkt cons/prod %d -> %d, bd cons/prod %d -> %d, cons_sb %p", - txdata->tx_pkt_cons, txdata->tx_pkt_prod, - txdata->tx_bd_cons, txdata->tx_bd_prod, - txdata->tx_cons_sb ); - /* - * on the receive ring, must set buf addresses into the slots. - */ - for (ring_nr = 0; ring_nr < nq; ring_nr++) { - slot = netmap_reset(na, NR_RX, ring_nr, 0); - fp = &bp->fp[ring_nr]; - txdata = &fp->txdata[0]; - ND("rx: comp cons/prod %d -> %d, bd cons/prod %d -> %d, cons_sb %p", - fp->rx_comp_cons, fp->rx_comp_prod, - fp->rx_bd_cons, fp->rx_bd_prod, - fp->rx_cons_sb ); - for (j = 0; j < na->num_rx_desc; j++) { - uint64_t paddr; - void *addr = PNMB(na, slot + j, &paddr); - // XXX to be completed - } - } - /* now use regular interrupts */ - D("------------- clear the SKIP_INTR flag"); - // XXX na->na_flags &= ~NAF_SKIP_INTR; - return 1; -} - - -/* - * The attach routine, called near the end of bnx2x_init_one(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -static void -bnx2x_netmap_attach(struct SOFTC_T *adapter) -{ - struct netmap_adapter na; - struct net_device *dev = adapter->dev; - - bzero(&na, sizeof(na)); - - na.ifp = dev; - na.pdev = &adapter->pdev->dev; - /* The ring size is the number of tx bd, but since we use 2 per - * packet, make the tx ring shorter. - * Let's see what to do with the - * skipping those continuation blocks. - */ - na.num_tx_desc = adapter->tx_ring_size / 2 - 10; - na.num_rx_desc = na.num_tx_desc; // XXX see above - na.nm_txsync = bnx2x_netmap_txsync; - na.nm_rxsync = bnx2x_netmap_rxsync; - na.nm_register = bnx2x_netmap_reg; - /* same number of tx and rx queues. queue 0 is somewhat special - * but we still cosider it. If FCOE is supported, the last hw - * queue is used for it. - */ - na.num_tx_rings = na.num_rx_rings = BNX2X_NUM_ETH_QUEUES(adapter); - netmap_attach(&na); - D("%d queues, tx: %d rx %d slots", na.num_rx_rings, - na.num_tx_desc, na.num_rx_desc); -} -#endif /* NETMAP_BNX2X_MAIN */ -/* end of file */ diff --git a/private/LINUX/mlx4_netmap_linux.h b/private/LINUX/mlx4_netmap_linux.h deleted file mode 100644 index 8a8e6544a..000000000 --- a/private/LINUX/mlx4_netmap_linux.h +++ /dev/null @@ -1,726 +0,0 @@ -/* - * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: mlx4_netmap_linux.h $ - * - * netmap support for mlx4 (LINUX version) - * - */ - - -#include -#include -#include -#define SOFTC_T mlx4_en_priv - -/* - * This driver is split in multiple small files. - * The main device descriptor has type struct mlx4_en_priv *priv; - * and we attach to the device in mlx4_en_init_netdev() - * (do port numbers start from 1 ?) - * - * The reconfig routine is in mlx4_en_start_port() (also here) - * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. - * - * priv->num_frags ?? - * DS_SIZE ?? - * apparently each rx desc is followed by frag.descriptors - * and the rx desc is rounded up to a power of 2. - * - * Receive code is in en_rx.c - * priv->rx_ring_num number of rx rings - * rxr = prov->rx_ring[ring_ind] rx ring descriptor - * rxr->size number of slots - * rxr->prod producer - * probably written into a mmio reg at *rxr->wqres.db.db - * trimmed to 16 bits. - * - * Rx init routine: - * mlx4_en_activate_rx_rings() - * mlx4_en_init_rx_desc() - * Transmit code is in en_tx.c - */ - -int mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr); -int mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr); - -int mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc); - -#ifdef NETMAP_MLX4_MAIN -static inline void -nm_pkt_dump(int i, char *buf, int len) -{ - uint8_t *s __attribute__((unused)) = buf+6, *d __attribute__((unused)) = buf; - - RD(10, "%d len %4d %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x", - i, - len, - s[0], s[1], s[2], s[3], s[4], s[5], - d[0], d[1], d[2], d[3], d[4], d[5]); -} - -/* show the content of the descriptor. Only the first block is printed - * to make sure we do not fail on wraparounds (otherwise we would need - * base, index and ring size). - */ -int -mlx4_tx_desc_dump(struct mlx4_en_tx_desc *tx_desc) -{ - struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl; - uint32_t *p = (uint32_t *)tx_desc; - int i, l = ctrl->fence_size; - - RD(5,"------- txdesc %p size 0x%x", tx_desc, ctrl->fence_size); - if (l > 4) - l = 4; - for (i = 0; i < l; i++) { - RD(20, "[%2d]: 0x%08x 0x%08x 0x%08x 0x%08x", i, - ntohl(p[0]), ntohl(p[1]), ntohl(p[2]), ntohl(p[3])); - p += 4; - } - return 0; -} - - -/* - * Register/unregister. We are already under (netmap) core lock. - * Only called on the first register or the last unregister. - */ -static int -mlx4_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct SOFTC_T *priv = netdev_priv(ifp); - int error = 0, need_load = 0; - struct mlx4_en_dev *mdev = priv->mdev; - - /* - * On enable, flush pending ops, set flag and reinit rings. - * On disable, flush again, and restart the interface. - */ - D("setting netmap mode for %s to %s", na->name, onoff ? "ON" : "OFF"); - // rtnl_lock(); // ??? - if (netif_running(ifp)) { - D("unloading %s", na->name); - //double_mutex_state_lock(mdev); - mutex_lock(&mdev->state_lock); - if (onoff == 0) { - int i; - /* coming from netmap mode, clean up the ring pointers - * so we do not crash in mlx4_en_free_tx_buf() - * XXX should STAMP the txdesc value to pretend the hw got there - * 0x7fffffff plus the bit set to - * !!(ring->cons & ring->size) - */ - for (i = 0; i < na->num_tx_rings; i++) { - struct mlx4_en_tx_ring *txr = &priv->tx_ring[i]; - ND("txr %d : cons %d prod %d txbb %d", i, txr->cons, txr->prod, txr->last_nr_txbb); - txr->cons += txr->last_nr_txbb; // XXX should be 1 - for (;txr->cons != txr->prod; txr->cons++) { - uint16_t j = txr->cons & txr->size_mask; - uint32_t new_val, *ptr = (uint32_t *)(txr->buf + j * TXBB_SIZE); - new_val = cpu_to_be32(STAMP_VAL | (!!(txr->cons & txr->size) << STAMP_SHIFT)); - ND(10, "old 0x%08x new 0x%08x", *ptr, new_val); - *ptr = new_val; - } - } - } - mlx4_en_stop_port(ifp); - need_load = 1; - } - -retry: - if (onoff) { /* enable netmap mode */ - nm_set_native_flags(na); - } else { /* reset normal mode */ - nm_clear_native_flags(na); - } - if (need_load) { - D("loading %s", na->name); - error = mlx4_en_start_port(ifp); - D("start_port returns %d", error); - if (error && onoff) { - onoff = 0; - goto retry; - } - mutex_unlock(&mdev->state_lock); - //double_mutex_state_unlock(mdev); - } - // rtnl_unlock(); - return (error); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - * This routine might be called frequently so it must be efficient. - * - -OUTGOING (txr->prod) -Tx packets need to fill a 64-byte block with one control block and -one descriptor (both 16-byte). Probably we need to fill the other -two data entries in the block with NULL entries as done in rx_config(). -One can request completion reports (intr) on all entries or only -on selected ones. The std. driver reports every 16 packets. - -txr->prod points to the first available slot to send. - -COMPLETION (txr->cons) -TX events are reported through a Completion Queue (CQ) whose entries -can be 32 or 64 bytes. In case of 64 bytes, the interesting part is -at odd indexes. The "factor" variable does the addressing. - -txr->cons points to the last completed block (XXX note so it is 1 behind) - -There is no link back from the txring to the completion -queue so we need to track it ourselves. HOWEVER mlx4_en_alloc_resources() -uses the same index for cq and ring so tx_cq and tx_ring correspond, -same for rx_cq and rx_ring. - - */ -static int -mlx4_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->tx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - struct SOFTC_T *priv = netdev_priv(ifp); - int error = 0; - - if (!netif_carrier_ok(ifp)) { - goto out; - } - - // XXX debugging, only print if sending something - n = (txr->prod - txr->cons - 1) & 0xffffff; // should be modulo 2^24 ? - if (n >= txr->size) { - RD(5, "XXXXXXXXXXX txr %d overflow: cons %u prod %u size %d delta %d", - ring_nr, txr->cons, txr->prod, txr->size, n); - } - - /* - * First part: process new packets to send. - */ - nm_i = kring->nr_hwcur; - // XXX debugging, assuming lim is 2^x-1 - n = 0; // XXX debugging - if (nm_i != head) { /* we have new packets to send */ - ND(5,"START: txr %u cons %u prod %u hwcur %u head %u tail %d send %d", - ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->head, kring->nr_hwtail, - (head - nm_i) & lim); - - // XXX see en_tx.c :: mlx4_en_xmit() - /* - * In netmap the descriptor has one control segment - * and one data segment. The control segment is 16 bytes, - * the data segment is another 16 bytes mlx4_wqe_data_seg. - * The alignment is TXBB_SIZE (64 bytes) though, so we are - * forced to use 64 bytes each. - */ - - ND(10,"=======>========== send from %d to %d at bd %d", j, k, txr->prod); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - /* device-specific */ - uint32_t l = txr->prod & txr->size_mask; - struct mlx4_en_tx_desc *tx_desc = txr->buf + l * TXBB_SIZE; - struct mlx4_wqe_ctrl_seg *ctrl = &tx_desc->ctrl; - - NM_CHECK_ADDR_LEN(addr, len); - - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, unload and reload map */ - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - /* - * Fill the slot in the NIC ring. - */ - ctrl->vlan_tag = 0; // not used - ctrl->ins_vlan = 0; // NO - ctrl->fence_size = 2; // used descriptor size in 16byte blocks - // request notification. XXX later report only if NS_REPORT or not too often. - ctrl->srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | - MLX4_WQE_CTRL_SOLICITED); - - // XXX do we need to copy the mac dst address ? - if (1) { // XXX do we need this ? - uint64_t mac = mlx4_en_mac_to_u64(addr); - uint32_t mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); - uint32_t mac_l = (u32) (mac & 0xffffffff); - - ctrl->srcrb_flags |= cpu_to_be32(mac_h); - ctrl->imm = cpu_to_be32(mac_l); - } - - tx_desc->data.addr = cpu_to_be64(paddr); - tx_desc->data.lkey = cpu_to_be32(priv->mdev->mr.key); - wmb(); // XXX why here ? - tx_desc->data.byte_count = cpu_to_be32(len); // XXX crc corrupt ? - wmb(); - ctrl->owner_opcode = cpu_to_be32( - MLX4_OPCODE_SEND | - ((txr->prod & txr->size) ? MLX4_EN_BIT_DESC_OWN : 0) ); - txr->prod++; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - - /* XXX Check how to deal with nkr_hwofs */ - /* these two are always in sync. */ - wmb(); /* synchronize writes to the NIC ring */ - /* (re)start the transmitter up to slot l (excluded) */ - ND(5, "doorbell cid %d data 0x%x", txdata->cid, txdata->tx_db.raw); - // XXX is this doorbell correct ? - iowrite32be(txr->doorbell_qpn, txr->bf.uar->map + MLX4_SEND_DOORBELL); - } - // XXX debugging, only print if sent something - if (n) - ND(5, "SENT: txr %d cons %u prod %u hwcur %u cur %u tail %d sent %d", - ring_nr, txr->cons, txr->prod, kring->nr_hwcur, ring->cur, kring->nr_hwtail, n); - - /* - * Second part: reclaim buffers for completed transmissions. - */ - - { - struct mlx4_en_cq *cq = &priv->tx_cq[ring_nr]; - struct mlx4_cq *mcq = &cq->mcq; - - int size = cq->size; // number of entries - struct mlx4_cqe *buf = cq->buf; // base of cq entries - uint32_t size_mask = txr->size_mask; // same in txq and cq ?....... - uint16_t new_index, ring_index; - int factor = priv->cqe_factor; // 1 for 64 bytes, 0 for 32 bytes - - /* - * Reclaim buffers for completed transmissions. The CQE tells us - * where the consumer (NIC) is. Bit 7 of the owner_sr_opcode - * is the ownership bit. It toggles up and down so the - * non-bitwise XNOR trick lets us detect toggles as the ring - * wraps around. On even rounds, the second operand is 0 so - * we exit when the MLX4_CQE_OWNER_MASK bit is 1, viceversa - * on odd rounds. - */ - new_index = ring_index = txr->cons & size_mask; - - for (n = 0; n < 2*lim; n++) { - uint16_t index = mcq->cons_index & size_mask; - struct mlx4_cqe *cqe = &buf[(index << factor) + factor]; - - if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, - mcq->cons_index & size)) - break; - /* - * make sure we read the CQE after we read the - * ownership bit - */ - rmb(); - - /* Skip over last polled CQE */ - new_index = be16_to_cpu(cqe->wqe_index) & size_mask; - ND(5, "txq %d new_index %d", ring_nr, new_index); - mcq->cons_index++; - } - if (n > lim) { - D("XXXXXXXXXXX too many notifications %d", n); - } - /* now we have updated cons-index, notify the card. */ - /* XXX can we make it conditional ? */ - wmb(); - mlx4_cq_set_ci(mcq); - // XXX the following enables interrupts... */ - // mlx4_en_arm_cq(priv, cq); // XXX always ? - wmb(); - /* XXX unsigned arithmetic below */ - n = (new_index - ring_index) & size_mask; - if (n) { - ND(5, "txr %d completed %d packets", ring_nr, n); - txr->cons += n; - /* XXX watch out, index is probably modulo */ - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, (new_index & size_mask)), lim); - } - if (nm_kr_txempty(kring)) { - mlx4_en_arm_cq(priv, cq); - } - } - -out: - return 0; - -err: - if (error) - return netmap_ring_reinit(kring); - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - -MELLANOX: - -the ring has prod and cons indexes, the size is a power of 2, -size and actual_size indicate how many entries can be allocated, -stride is the size of each entry. - -mlx4_en_update_rx_prod_db() tells the NIC where it can go -(to be used when new buffers are freed). - - */ -static int -mlx4_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) -{ - struct ifnet *ifp = na->ifp; - struct netmap_kring *kring = &na->rx_rings[ring_nr]; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - struct SOFTC_T *priv = netdev_priv(ifp); - struct mlx4_en_rx_ring *rxr = &priv->rx_ring[ring_nr]; - - if (!priv->port_up) // XXX as in mlx4_en_process_rx_cq() - return 0; - - if (!netif_carrier_ok(ifp)) // XXX maybe above is redundant ? - return 0; - - if (head > lim) - return netmap_ring_reinit(kring); - - ND(5, "START rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d", - ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail); - - /* - * First part, import newly received packets. - */ - - /* scan the completion queue to see what is going on. - * The mapping is 1:1. The hardware toggles the OWNER bit in the - * descriptor at mcq->cons_index & size_mask, which is mapped 1:1 - * to an entry in the RXR. - * XXX there are two notifications sent to the hw: - * mlx4_cq_set_ci(struct mlx4_cq *cq); - * *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); - * mlx4_en_update_rx_prod_db(rxr); - * *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); - * apparently they point to the same memory word - * (see mlx4_en_activate_cq() ) and are initialized to 0 - * DB is the doorbell page (sec.15.1.2 ?) - * wqres is set in mlx4_alloc_hwq_res() - * and in turn mlx4_alloc_hwq_res() - */ - if (1 || netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - struct mlx4_en_cq *cq = &priv->rx_cq[ring_nr]; - struct mlx4_cq *mcq = &cq->mcq; - int factor = priv->cqe_factor; - uint32_t size_mask = rxr->size_mask; - int size = cq->size; - struct mlx4_cqe *buf = cq->buf; - - nm_i = kring->nr_hwtail; - - /* Process all completed CQEs, use same logic as in TX */ - for (n = 0; n <= 2*lim ; n++) { - int index = mcq->cons_index & size_mask; - struct mlx4_cqe *cqe = &buf[(index << factor) + factor]; - prefetch(cqe+1); - if (!XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, mcq->cons_index & size)) - break; - - rmb(); /* make sure data is up to date */ - ring->slot[nm_i].len = be32_to_cpu(cqe->byte_cnt) - rxr->fcs_del; - ring->slot[nm_i].flags = slot_flags; - mcq->cons_index++; - nm_i = nm_next(nm_i, lim); - } - if (n) { /* update the state variables */ - if (n >= 2*lim) - D("XXXXXXXXXXXXX too many received packets %d", n); - ND(5, "received %d packets", n); - kring->nr_hwtail = nm_i; - rxr->cons += n; - ND(5, "RECVD %d rxr %d cons %d prod %d kcur %d ktail %d cur %d tail %d", - n, - ring_nr, rxr->cons, rxr->prod, kring->nr_hwcur, kring->nr_hwtail, ring->cur, ring->tail); - - /* XXX ack completion queue */ - mlx4_cq_set_ci(mcq); - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; /* netmap ring index */ - if (nm_i != head) { /* userspace has released some packets. */ - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - /* collect per-slot info, with similar validations - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - struct mlx4_en_rx_desc *rx_desc = rxr->buf + (nic_i * rxr->stride); - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - // netmap_reload_map(pdev, DMA_TO_DEVICE, old_addr, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - - /* XXX - * The rx descriptor only contains buffer descriptors, - * probably only the length is changed or not even that one. - */ - // see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag() - rx_desc->data[0].addr = cpu_to_be64(paddr); - rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE); - rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); - -#if 0 - int jj, possible_frags; - /* we only use one fragment, so the rest is padding */ - possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - for (jj = 1; jj < possible_frags; jj++) { - rx_desc->data[jj].byte_count = 0; - rx_desc->data[jj].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); - rx_desc->data[jj].addr = 0; - } -#endif - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - - /* XXX note that mcq->cons_index and ring->cons are not in sync */ - wmb(); - rxr->prod += n; - kring->nr_hwcur = head; - - /* and now tell the system that there are more buffers available. - * should use mlx4_en_update_rx_prod_db(rxr) but it is static in - * en_rx.c so we do not see it here - */ - *rxr->wqres.db.db = cpu_to_be32(rxr->prod & 0xffff); - - ND(5, "FREED rxr %d cons %d prod %d kcur %d ktail %d", - ring_nr, rxr->cons, rxr->prod, - kring->nr_hwcur, kring->nr_hwtail); - } - - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * If in netmap mode, attach the netmap buffers to the ring and return true. - * Otherwise return false. - * Called at the end of mlx4_en_start_port(). - * XXX TODO: still incomplete. - */ -int -mlx4_netmap_tx_config(struct SOFTC_T *priv, int ring_nr) -{ - struct netmap_adapter *na = NA(priv->dev); - struct netmap_slot *slot; - struct mlx4_en_cq *cq; - - ND(5, "priv %p ring_nr %d", priv, ring_nr); - -/* - CONFIGURE TX RINGS IN NETMAP MODE - little if anything to do - The main code does - mlx4_en_activate_cq() - mlx4_en_activate_tx_ring() - - - */ - slot = netmap_reset(na, NR_TX, ring_nr, 0); - if (!slot) - return 0; // not in netmap native mode; - ND(5, "init tx ring %d with %d slots (driver %d)", ring_nr, - na->num_tx_desc, - priv->tx_ring[ring_nr].size); - /* enable interrupts on the netmap queues */ - cq = &priv->tx_cq[ring_nr]; // derive from the txring - - return 1; -} - -int -mlx4_netmap_rx_config(struct SOFTC_T *priv, int ring_nr) -{ - struct netmap_adapter *na = NA(priv->dev); - struct netmap_slot *slot; - struct mlx4_en_rx_ring *rxr; - struct netmap_kring *kring; - int i, j, possible_frags; - - /* - * on the receive ring, must set buf addresses into the slots. - - The ring is activated by mlx4_en_activate_rx_rings(), near the end - the rx ring is also 'started' with mlx4_en_update_rx_prod_db() - so we patch into that routine. - - */ - slot = netmap_reset(na, NR_RX, ring_nr, 0); - if (!slot) - return 0; // not in native netmap mode - kring = &na->rx_rings[ring_nr]; - rxr = &priv->rx_ring[ring_nr]; - ND(20, "ring %d slots %d (driver says %d) frags %d stride %d", ring_nr, - kring->nkr_num_slots, rxr->actual_size, priv->num_frags, rxr->stride); - rxr->prod--; // XXX avoid wraparounds ? - if (kring->nkr_num_slots != rxr->actual_size) { - D("mismatch between slots and actual size, %d vs %d", - kring->nkr_num_slots, rxr->actual_size); - return 1; // XXX error - } - possible_frags = (rxr->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - RD(1, "stride %d possible frags %d descsize %d DS_SIZE %d", rxr->stride, possible_frags, (int)sizeof(struct mlx4_en_rx_desc), (int)DS_SIZE ); - /* then fill the slots with our entries */ - for (i = 0; i < kring->nkr_num_slots; i++) { - uint64_t paddr; - struct mlx4_en_rx_desc *rx_desc = rxr->buf + (i * rxr->stride); - - PNMB(na, slot + i, &paddr); - - // see mlx4_en_prepare_rx_desc() and mlx4_en_alloc_frag() - rx_desc->data[0].addr = cpu_to_be64(paddr); - rx_desc->data[0].byte_count = cpu_to_be32(NETMAP_BUF_SIZE); - rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); - - /* we only use one fragment, so the rest is padding */ - for (j = 1; j < possible_frags; j++) { - rx_desc->data[j].byte_count = 0; - rx_desc->data[j].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); - rx_desc->data[j].addr = 0; - } - } - RD(5, "ring %d done", ring_nr); - return 1; -} - -static int -mlx4_netmap_config(struct netmap_adapter *na, - u_int *txr, u_int *txd, u_int *rxr, u_int *rxd) -{ - struct net_device *ifp = na->ifp; - struct SOFTC_T *priv = netdev_priv(ifp); - - *txr = priv->tx_ring_num; - *txd = priv->tx_ring[0].size; - - - *rxr = priv->rx_ring_num; - if (*txr > *rxr) { - D("using only %d out of %d tx queues", *rxr, *txr); - *txr = *rxr; - } - *rxd = priv->rx_ring[0].size; - D("txr %d txd %d bufsize %d -- rxr %d rxd %d act %d bufsize %d", - *txr, *txd, priv->tx_ring[0].buf_size, - *rxr, *rxd, priv->rx_ring[0].actual_size, - priv->rx_ring[0].buf_size); - return 0; -} - - -/* - * The attach routine, called near the end of mlx4_en_init_netdev(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - * - * XXX TODO: - * at the moment use a single lock, and only init a max of 4 queues. - */ -static void -mlx4_netmap_attach(struct SOFTC_T *priv) -{ - struct netmap_adapter na; - struct net_device *dev = priv->dev; - int rxq, txq; - - bzero(&na, sizeof(na)); - - na.ifp = dev; - na.pdev = &priv->pdev->dev; - rxq = priv->rx_ring_num; - txq = priv->tx_ring_num; - /* this card has 1k tx queues, so better limit the number */ - if (rxq > 16) - rxq = 16; - if (txq > rxq) - txq = rxq; - if (txq < 1 && rxq < 1) - txq = rxq = 1; - na.num_tx_rings = txq; - na.num_rx_rings = rxq; - na.num_tx_desc = priv->tx_ring[0].size; - na.num_rx_desc = priv->rx_ring[0].size; - na.nm_txsync = mlx4_netmap_txsync; - na.nm_rxsync = mlx4_netmap_rxsync; - na.nm_register = mlx4_netmap_reg; - na.nm_config = mlx4_netmap_config; - netmap_attach(&na); -} -#endif /* NETMAP_MLX4_MAIN */ -/* end of file */ diff --git a/private/LINUX/vhost-port/Makefile b/private/LINUX/vhost-port/Makefile deleted file mode 100644 index d19dbb8cd..000000000 --- a/private/LINUX/vhost-port/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -obj-m += v1000_net.o -v1000_net-objs := v1000.o net.o - - -KSRC=/lib/modules/$(shell uname -r)/build - -all: module test tags - -module: - make -C $(KSRC) M=$(PWD) modules - -install: - make -C $(KSRC) INSTALL_MOD_DIR=extramodules M=$(PWD) modules_install - -test: test.o tun_alloc.o buildpkt.o - gcc -Wall -g -o test test.o tun_alloc.o buildpkt.o -lpthread - -test.o: test.c v1000_user.h tun_alloc.h - gcc -Wall -g -c test.c - -tun_alloc.o: tun_alloc.c tun_alloc.h - gcc -Wall -g -c tun_alloc.c - -buildpkt.o: - gcc -Wall -g -c buildpkt.c - -clean: - make -C $(KSRC) M=$(PWD) clean - -rm test *.o tags cscope.out - -tags: v1000.c v1000.h net.c test.c - ctags -R diff --git a/private/LINUX/vhost-port/buildpkt.c b/private/LINUX/vhost-port/buildpkt.c deleted file mode 100644 index 870cd1ab4..000000000 --- a/private/LINUX/vhost-port/buildpkt.c +++ /dev/null @@ -1,216 +0,0 @@ -#include "buildpkt.h" - - -#include -#include -#include -#include -#include -#include - -#include /* if_nametoindex() */ -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - - - -/* Program arguments */ -struct arguments { - u_int8_t dst_mac[6]; /* User specified destination MAC. */ - u_int8_t src_mac[6]; /* User specified source MAC. */ - struct in_addr dst_ip; /* User specified destination IP. */ - struct in_addr src_ip; /* User specified source IP. */ - int dst_port; /* User specified destination port. */ - int src_port; /* User specified source port. */ - int packet_len; /* User specified frame length. */ - uint32_t seed; /* Used to fill the UDP payload. */ - int checksum; /* Do we calculate the UDP checksum? */ - void *packet; /* Packet buffer pointer. */ -}; - -/* Compute the checksum of the given ip header. */ -static uint16_t checksum(const void *data, uint16_t len, uint32_t sum) -{ - const uint8_t *addr = data; - uint32_t i; - - /* Checksum all the pairs of bytes first... */ - for (i = 0; i < (len & ~1U); i += 2) { - sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); - if (sum > 0xFFFF) - sum -= 0xFFFF; - } - /* - * If there's a single byte left over, checksum it, too. - * Network byte order is big-endian, so the remaining byte is - * the high byte. - */ - if (i < len) { - sum += addr[i] << 8; - if (sum > 0xFFFF) - sum -= 0xFFFF; - } - return sum; -} - -static u_int16_t wrapsum(u_int32_t sum) -{ - sum = ~sum & 0xFFFF; - return (htons(sum)); -} - -static void initialize_packet(struct arguments *a) -{ - struct pkt *pkt = a->packet; - struct ether_header *eh; - struct ip *ip; - struct udphdr *udp; - uint16_t paylen = a->packet_len - sizeof(*eh) - sizeof(struct ip); - int i, l, l0 = sizeof(a->seed); - - for (i = 0; i < paylen;) { - l = l0 < paylen - i ? l0 : paylen - i; - bcopy(&a->seed, pkt->body + i, l); - i += l; - } - ip = &pkt->ip; - - ip->ip_v = IPVERSION; - ip->ip_hl = 5; - ip->ip_id = 0; - ip->ip_tos = IPTOS_LOWDELAY; - ip->ip_len = ntohs(a->packet_len - sizeof(*eh)); - ip->ip_id = 0; - ip->ip_off = htons(IP_DF); /* Don't fragment */ - ip->ip_ttl = IPDEFTTL; - ip->ip_p = IPPROTO_UDP; - ip->ip_dst.s_addr = a->dst_ip.s_addr; - ip->ip_src.s_addr = a->src_ip.s_addr; - ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); - - - udp = &pkt->udp; - udp->uh_sport = htons(a->src_port); - udp->uh_dport = htons(a->dst_port); - udp->uh_ulen = htons(paylen); - if (a->checksum) { - /* Magic: taken from sbin/dhclient/packet.c */ - udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), - checksum(pkt->body, - paylen - sizeof(*udp), - checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), - IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) - ) - ) - )); - } else - udp->uh_sum = 0; - - eh = &pkt->eh; - bcopy(a->src_mac, eh->ether_shost, 6); - bcopy(a->dst_mac, eh->ether_dhost, 6); - eh->ether_type = htons(ETHERTYPE_IP); -} - - -static int parse_mac(char *buf, u_int8_t mac[6]) -{ - char *p = buf, *q; - int i = 0; - long int tmp; - - for (i = 0; i < 6; i++) { - tmp = strtol(p, &q, 16); - if ( (i < 5 && *q != ':') || (i == 5 && *q) || tmp < 0 || tmp > 255 ) { - return -1; - } - mac[i] = tmp; - p = q + 1; - } - return 0; -} - -static int fill_arguments(int argc, char *argv[], struct arguments * a) -{ - long payloadsize, srcport, dstport; - char * dummy; - - /* MAC addresses. */ - if (parse_mac(argv[0], a->dst_mac) < 0) { - fprintf(stderr, "invalid macaddr: %s\n", argv[0]); - return -1; - } - - if (parse_mac(argv[1], a->src_mac) < 0) { - fprintf(stderr, "invalid macaddr: %s\n", argv[1]); - return -1; - } - - /* IP addresses. */ - if (inet_aton(argv[2], &a->dst_ip) == 0) { - fprintf(stderr, "invalid destination IP: %s\n", argv[2]); - return -1; - } - - if (inet_aton(argv[3], &a->src_ip) == 0) { - fprintf(stderr, "invalid source IP: %s\n", argv[3]); - return -1; - } - - /* UDP ports. */ - dstport = strtoul(argv[4], &dummy, 10); - if (dstport < 0 || dstport > 65536 || *dummy != '\0') { - fprintf(stderr, "invalid destination port\n"); - return -1; - } - a->dst_port = dstport; - - srcport = strtoul(argv[5], &dummy, 10); - if (srcport < 0 || srcport > 65536 || *dummy != '\0') { - fprintf(stderr, "invalid source port\n"); - return -1; - } - a->src_port = srcport; - - /* Ethernet frame size. */ - payloadsize = strtoul(argv[6], &dummy, 10); - if (payloadsize < 46 || *dummy != '\0') { - fprintf(stderr, "payloadsize < 46\n"); - return -1; - } - if (payloadsize > 70000) { - fprintf(stderr, "payloadsize > 1500\n"); - return -1; - } - a->packet_len = payloadsize; - - return 0; -} - - -void build_packet_from_args(int argc, char *argv[], struct pkt * p, - uint32_t seed, int checksum) -{ - struct arguments a; - - fill_arguments(argc, argv, &a); - - a.packet = p; /* Memory for *p is allocated from the user. */ - a.seed = seed; - a.checksum = checksum; - - initialize_packet(&a); -} diff --git a/private/LINUX/vhost-port/buildpkt.h b/private/LINUX/vhost-port/buildpkt.h deleted file mode 100644 index 67f174912..000000000 --- a/private/LINUX/vhost-port/buildpkt.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __BUILD__PACKET__HH -#define __BUILD__PACKET__HH - -#define _BSD_SOURCE - - -#include -#include -#include -#include - - - -/* An UDP packet. */ -struct pkt { - struct ether_header eh; - struct ip ip; - struct udphdr udp; - uint8_t body[2048]; -} __attribute__((__packed__)); - - -void build_packet_from_args(int argc, char *argv[], struct pkt * p, - uint32_t seed, int checksum); - -#endif diff --git a/private/LINUX/vhost-port/clean.sh b/private/LINUX/vhost-port/clean.sh deleted file mode 100755 index 86b2559eb..000000000 --- a/private/LINUX/vhost-port/clean.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -set -x - -sudo rmmod v1000_net.ko -#sudo rm /dev/v1000 - -mv .test.c test.c - -sudo arp -d "10.1.1.1" -sudo arp -d "10.1.1.2" - -sudo ip link set br1 down -sudo brctl delbr br1 -sudo ip link set tap1 up -sudo ip link set tap2 up -sudo ip tuntap del mode tap name tap1 -sudo ip tuntap del mode tap name tap2 diff --git a/private/LINUX/vhost-port/e1000_regs.h b/private/LINUX/vhost-port/e1000_regs.h deleted file mode 100644 index c9cb79e64..000000000 --- a/private/LINUX/vhost-port/e1000_regs.h +++ /dev/null @@ -1,893 +0,0 @@ -/******************************************************************************* - - Intel PRO/1000 Linux driver - Copyright(c) 1999 - 2006 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, see . - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - Linux NICS - e1000-devel Mailing List - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ - -/* e1000_hw.h - * Structures, enums, and macros for the MAC - */ - -#ifndef _E1000_HW_H_ -#define _E1000_HW_H_ - - -/* PCI Device IDs */ -#define E1000_DEV_ID_82542 0x1000 -#define E1000_DEV_ID_82543GC_FIBER 0x1001 -#define E1000_DEV_ID_82543GC_COPPER 0x1004 -#define E1000_DEV_ID_82544EI_COPPER 0x1008 -#define E1000_DEV_ID_82544EI_FIBER 0x1009 -#define E1000_DEV_ID_82544GC_COPPER 0x100C -#define E1000_DEV_ID_82544GC_LOM 0x100D -#define E1000_DEV_ID_82540EM 0x100E -#define E1000_DEV_ID_82540EM_LOM 0x1015 -#define E1000_DEV_ID_82540EP_LOM 0x1016 -#define E1000_DEV_ID_82540EP 0x1017 -#define E1000_DEV_ID_82540EP_LP 0x101E -#define E1000_DEV_ID_82545EM_COPPER 0x100F -#define E1000_DEV_ID_82545EM_FIBER 0x1011 -#define E1000_DEV_ID_82545GM_COPPER 0x1026 -#define E1000_DEV_ID_82545GM_FIBER 0x1027 -#define E1000_DEV_ID_82545GM_SERDES 0x1028 -#define E1000_DEV_ID_82546EB_COPPER 0x1010 -#define E1000_DEV_ID_82546EB_FIBER 0x1012 -#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D -#define E1000_DEV_ID_82541EI 0x1013 -#define E1000_DEV_ID_82541EI_MOBILE 0x1018 -#define E1000_DEV_ID_82541ER_LOM 0x1014 -#define E1000_DEV_ID_82541ER 0x1078 -#define E1000_DEV_ID_82547GI 0x1075 -#define E1000_DEV_ID_82541GI 0x1076 -#define E1000_DEV_ID_82541GI_MOBILE 0x1077 -#define E1000_DEV_ID_82541GI_LF 0x107C -#define E1000_DEV_ID_82546GB_COPPER 0x1079 -#define E1000_DEV_ID_82546GB_FIBER 0x107A -#define E1000_DEV_ID_82546GB_SERDES 0x107B -#define E1000_DEV_ID_82546GB_PCIE 0x108A -#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 -#define E1000_DEV_ID_82547EI 0x1019 -#define E1000_DEV_ID_82547EI_MOBILE 0x101A -#define E1000_DEV_ID_82571EB_COPPER 0x105E -#define E1000_DEV_ID_82571EB_FIBER 0x105F -#define E1000_DEV_ID_82571EB_SERDES 0x1060 -#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 -#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 -#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 -#define E1000_DEV_ID_82571EB_QUAD_COPPER_LOWPROFILE 0x10BC -#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 -#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA -#define E1000_DEV_ID_82572EI_COPPER 0x107D -#define E1000_DEV_ID_82572EI_FIBER 0x107E -#define E1000_DEV_ID_82572EI_SERDES 0x107F -#define E1000_DEV_ID_82572EI 0x10B9 -#define E1000_DEV_ID_82573E 0x108B -#define E1000_DEV_ID_82573E_IAMT 0x108C -#define E1000_DEV_ID_82573L 0x109A -#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 -#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 -#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 -#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA -#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB - -#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 -#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A -#define E1000_DEV_ID_ICH8_IGP_C 0x104B -#define E1000_DEV_ID_ICH8_IFE 0x104C -#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 -#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 -#define E1000_DEV_ID_ICH8_IGP_M 0x104D - -/* Register Set. (82543, 82544) - * - * Registers are defined to be 32 bits and should be accessed as 32 bit values. - * These registers are physically located on the NIC, but are mapped into the - * host memory address space. - * - * RW - register is both readable and writable - * RO - register is read only - * WO - register is write only - * R/clr - register is read only and is cleared when read - * A - register array - */ -#define E1000_CTRL 0x00000 /* Device Control - RW */ -#define E1000_CTRL_DUP 0x00004 /* Device Control Duplicate (Shadow) - RW */ -#define E1000_STATUS 0x00008 /* Device Status - RO */ -#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ -#define E1000_EERD 0x00014 /* EEPROM Read - RW */ -#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ -#define E1000_FLA 0x0001C /* Flash Access - RW */ -#define E1000_MDIC 0x00020 /* MDI Control - RW */ -#define E1000_SCTL 0x00024 /* SerDes Control - RW */ -#define E1000_FEXTNVM 0x00028 /* Future Extended NVM register */ -#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ -#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ -#define E1000_FCT 0x00030 /* Flow Control Type - RW */ -#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ -#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ -#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ -#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ -#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ -#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ -#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ -#define E1000_RCTL 0x00100 /* RX Control - RW */ -#define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */ -#define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */ -#define E1000_RDBAH1 0x02904 /* RX Descriptor Base Address High (1) - RW */ -#define E1000_RDLEN1 0x02908 /* RX Descriptor Length (1) - RW */ -#define E1000_RDH1 0x02910 /* RX Descriptor Head (1) - RW */ -#define E1000_RDT1 0x02918 /* RX Descriptor Tail (1) - RW */ -#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ -#define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ -#define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ -#define E1000_TCTL 0x00400 /* TX Control - RW */ -#define E1000_TCTL_EXT 0x00404 /* Extended TX Control - RW */ -#define E1000_TIPG 0x00410 /* TX Inter-packet gap -RW */ -#define E1000_TBT 0x00448 /* TX Burst Timer - RW */ -#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ -#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ -#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ -#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ -#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ -#define FEXTNVM_SW_CONFIG 0x0001 -#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ -#define E1000_PBS 0x01008 /* Packet Buffer Size */ -#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ -#define E1000_FLASH_UPDATES 1000 -#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ -#define E1000_FLASHT 0x01028 /* FLASH Timer Register */ -#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ -#define E1000_FLSWCTL 0x01030 /* FLASH control register */ -#define E1000_FLSWDATA 0x01034 /* FLASH data register */ -#define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ -#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ -#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ -#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ -#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ -#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ -#define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ -#define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ -#define E1000_RDLEN 0x02808 /* RX Descriptor Length - RW */ -#define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ -#define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ -#define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ -#define E1000_RDBAL0 E1000_RDBAL /* RX Desc Base Address Low (0) - RW */ -#define E1000_RDBAH0 E1000_RDBAH /* RX Desc Base Address High (0) - RW */ -#define E1000_RDLEN0 E1000_RDLEN /* RX Desc Length (0) - RW */ -#define E1000_RDH0 E1000_RDH /* RX Desc Head (0) - RW */ -#define E1000_RDT0 E1000_RDT /* RX Desc Tail (0) - RW */ -#define E1000_RDTR0 E1000_RDTR /* RX Delay Timer (0) - RW */ -#define E1000_RXDCTL 0x02828 /* RX Descriptor Control queue 0 - RW */ -#define E1000_RXDCTL1 0x02928 /* RX Descriptor Control queue 1 - RW */ -#define E1000_RADV 0x0282C /* RX Interrupt Absolute Delay Timer - RW */ -#define E1000_RSRPD 0x02C00 /* RX Small Packet Detect - RW */ -#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ -#define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ -#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ -#define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ -#define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ -#define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ -#define E1000_TDFTS 0x03428 /* TX Data FIFO Tail Saved - RW */ -#define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ -#define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ -#define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ -#define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ -#define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ -#define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ -#define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ -#define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ -#define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ -#define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ -#define E1000_TARC0 0x03840 /* TX Arbitration Count (0) */ -#define E1000_TDBAL1 0x03900 /* TX Desc Base Address Low (1) - RW */ -#define E1000_TDBAH1 0x03904 /* TX Desc Base Address High (1) - RW */ -#define E1000_TDLEN1 0x03908 /* TX Desc Length (1) - RW */ -#define E1000_TDH1 0x03910 /* TX Desc Head (1) - RW */ -#define E1000_TDT1 0x03918 /* TX Desc Tail (1) - RW */ -#define E1000_TXDCTL1 0x03928 /* TX Descriptor Control (1) - RW */ -#define E1000_TARC1 0x03940 /* TX Arbitration Count (1) */ -#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ -#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ -#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ -#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ -#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ -#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ -#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ -#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ -#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ -#define E1000_COLC 0x04028 /* Collision Count - R/clr */ -#define E1000_DC 0x04030 /* Defer Count - R/clr */ -#define E1000_TNCRS 0x04034 /* TX-No CRS - R/clr */ -#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ -#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ -#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ -#define E1000_XONRXC 0x04048 /* XON RX Count - R/clr */ -#define E1000_XONTXC 0x0404C /* XON TX Count - R/clr */ -#define E1000_XOFFRXC 0x04050 /* XOFF RX Count - R/clr */ -#define E1000_XOFFTXC 0x04054 /* XOFF TX Count - R/clr */ -#define E1000_FCRUC 0x04058 /* Flow Control RX Unsupported Count- R/clr */ -#define E1000_PRC64 0x0405C /* Packets RX (64 bytes) - R/clr */ -#define E1000_PRC127 0x04060 /* Packets RX (65-127 bytes) - R/clr */ -#define E1000_PRC255 0x04064 /* Packets RX (128-255 bytes) - R/clr */ -#define E1000_PRC511 0x04068 /* Packets RX (255-511 bytes) - R/clr */ -#define E1000_PRC1023 0x0406C /* Packets RX (512-1023 bytes) - R/clr */ -#define E1000_PRC1522 0x04070 /* Packets RX (1024-1522 bytes) - R/clr */ -#define E1000_GPRC 0x04074 /* Good Packets RX Count - R/clr */ -#define E1000_BPRC 0x04078 /* Broadcast Packets RX Count - R/clr */ -#define E1000_MPRC 0x0407C /* Multicast Packets RX Count - R/clr */ -#define E1000_GPTC 0x04080 /* Good Packets TX Count - R/clr */ -#define E1000_GORCL 0x04088 /* Good Octets RX Count Low - R/clr */ -#define E1000_GORCH 0x0408C /* Good Octets RX Count High - R/clr */ -#define E1000_GOTCL 0x04090 /* Good Octets TX Count Low - R/clr */ -#define E1000_GOTCH 0x04094 /* Good Octets TX Count High - R/clr */ -#define E1000_RNBC 0x040A0 /* RX No Buffers Count - R/clr */ -#define E1000_RUC 0x040A4 /* RX Undersize Count - R/clr */ -#define E1000_RFC 0x040A8 /* RX Fragment Count - R/clr */ -#define E1000_ROC 0x040AC /* RX Oversize Count - R/clr */ -#define E1000_RJC 0x040B0 /* RX Jabber Count - R/clr */ -#define E1000_MGTPRC 0x040B4 /* Management Packets RX Count - R/clr */ -#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ -#define E1000_MGTPTC 0x040BC /* Management Packets TX Count - R/clr */ -#define E1000_TORL 0x040C0 /* Total Octets RX Low - R/clr */ -#define E1000_TORH 0x040C4 /* Total Octets RX High - R/clr */ -#define E1000_TOTL 0x040C8 /* Total Octets TX Low - R/clr */ -#define E1000_TOTH 0x040CC /* Total Octets TX High - R/clr */ -#define E1000_TPR 0x040D0 /* Total Packets RX - R/clr */ -#define E1000_TPT 0x040D4 /* Total Packets TX - R/clr */ -#define E1000_PTC64 0x040D8 /* Packets TX (64 bytes) - R/clr */ -#define E1000_PTC127 0x040DC /* Packets TX (65-127 bytes) - R/clr */ -#define E1000_PTC255 0x040E0 /* Packets TX (128-255 bytes) - R/clr */ -#define E1000_PTC511 0x040E4 /* Packets TX (256-511 bytes) - R/clr */ -#define E1000_PTC1023 0x040E8 /* Packets TX (512-1023 bytes) - R/clr */ -#define E1000_PTC1522 0x040EC /* Packets TX (1024-1522 Bytes) - R/clr */ -#define E1000_MPTC 0x040F0 /* Multicast Packets TX Count - R/clr */ -#define E1000_BPTC 0x040F4 /* Broadcast Packets TX Count - R/clr */ -#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context TX - R/clr */ -#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context TX Fail - R/clr */ -#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ -#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Packet Timer Expire Count */ -#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Absolute Timer Expire Count */ -#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Packet Timer Expire Count */ -#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Absolute Timer Expire Count */ -#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ -#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Minimum Threshold Count */ -#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Descriptor Minimum Threshold Count */ -#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ -#define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ -#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ -#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ -#define E1000_RA 0x05400 /* Receive Address - RW Array */ -#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ -#define E1000_WUC 0x05800 /* Wakeup Control - RW */ -#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ -#define E1000_WUS 0x05810 /* Wakeup Status - RO */ -#define E1000_MANC 0x05820 /* Management Control - RW */ -#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ -#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ -#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ -#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ -#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ -#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ -#define E1000_HOST_IF 0x08800 /* Host Interface */ -#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ -#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ - -#define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ -#define E1000_MDPHYA 0x0003C /* PHY address - RW */ -#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ -#define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ - -#define E1000_GCR 0x05B00 /* PCI-Ex Control */ -#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ -#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ -#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ -#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ -#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ -#define E1000_SWSM 0x05B50 /* SW Semaphore */ -#define E1000_FWSM 0x05B54 /* FW Semaphore */ -#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ -#define E1000_HICR 0x08F00 /* Host Inteface Control */ - -/* RSS registers */ -#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ -#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ -#define E1000_RETA 0x05C00 /* Redirection Table - RW Array */ -#define E1000_RSSRK 0x05C80 /* RSS Random Key - RW Array */ -#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ -#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ - -/* PHY 1000 MII Register/Bit Definitions */ -/* PHY Registers defined by IEEE */ -#define PHY_CTRL 0x00 /* Control Register */ -#define PHY_STATUS 0x01 /* Status Regiser */ -#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ -#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ -#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ -#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ -#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ -#define PHY_NEXT_PAGE_TX 0x07 /* Next Page TX */ -#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ -#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ -#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ - -#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ -#define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */ - -/* M88E1000 Specific Registers */ -#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Register */ -#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Register */ -#define M88E1000_INT_ENABLE 0x12 /* Interrupt Enable Register */ -#define M88E1000_INT_STATUS 0x13 /* Interrupt Status Register */ -#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Control */ -#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ - -#define M88E1000_PHY_EXT_CTRL 0x1A /* PHY extend control register */ -#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for page number setting */ -#define M88E1000_PHY_GEN_CONTROL 0x1E /* Its meaning depends on reg 29 */ -#define M88E1000_PHY_VCO_REG_BIT8 0x100 /* Bits 8 & 11 are adjusted for */ -#define M88E1000_PHY_VCO_REG_BIT11 0x800 /* improved BER performance */ - -/* PHY Control Register */ -#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ -#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ -#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ -#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ -#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ -#define MII_CR_POWER_DOWN 0x0800 /* Power down */ -#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ -#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ -#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ -#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ - -/* PHY Status Register */ -#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ -#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ -#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ -#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ -#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ -#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ -#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ -#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ -#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ -#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ -#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ -#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ -#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ -#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ -#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ - -/* Interrupt Cause Read */ -#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ -#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ -#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ -#define E1000_ICR_RXSEQ 0x00000008 /* rx sequence error */ -#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ -#define E1000_ICR_RXO 0x00000040 /* rx overrun */ -#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ -#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ -#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ -#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ -#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ -#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ -#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ -#define E1000_ICR_TXD_LOW 0x00008000 -#define E1000_ICR_SRPD 0x00010000 -#define E1000_ICR_ACK 0x00020000 /* Receive Ack frame */ -#define E1000_ICR_MNG 0x00040000 /* Manageability event */ -#define E1000_ICR_DOCK 0x00080000 /* Dock/Undock */ -#define E1000_ICR_INT_ASSERTED 0x80000000 /* If this bit asserted, the driver should claim the interrupt */ -#define E1000_ICR_RXD_FIFO_PAR0 0x00100000 /* queue 0 Rx descriptor FIFO parity error */ -#define E1000_ICR_TXD_FIFO_PAR0 0x00200000 /* queue 0 Tx descriptor FIFO parity error */ -#define E1000_ICR_HOST_ARB_PAR 0x00400000 /* host arb read buffer parity error */ -#define E1000_ICR_PB_PAR 0x00800000 /* packet buffer parity error */ -#define E1000_ICR_RXD_FIFO_PAR1 0x01000000 /* queue 1 Rx descriptor FIFO parity error */ -#define E1000_ICR_TXD_FIFO_PAR1 0x02000000 /* queue 1 Tx descriptor FIFO parity error */ -#define E1000_ICR_ALL_PARITY 0x03F00000 /* all parity error bits */ -#define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ -#define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ -#define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ - -/* Interrupt Cause Set */ -#define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ -#define E1000_ICS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ -#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ -#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ -#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ -#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ -#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ -#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ -#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ -#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ -#define E1000_ICS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ -#define E1000_ICS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ -#define E1000_ICS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ -#define E1000_ICS_TXD_LOW E1000_ICR_TXD_LOW -#define E1000_ICS_SRPD E1000_ICR_SRPD -#define E1000_ICS_ACK E1000_ICR_ACK /* Receive Ack frame */ -#define E1000_ICS_MNG E1000_ICR_MNG /* Manageability event */ -#define E1000_ICS_DOCK E1000_ICR_DOCK /* Dock/Undock */ -#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ -#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ -#define E1000_ICS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ -#define E1000_ICS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ -#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ -#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ -#define E1000_ICS_DSW E1000_ICR_DSW -#define E1000_ICS_PHYINT E1000_ICR_PHYINT -#define E1000_ICS_EPRST E1000_ICR_EPRST - -/* Interrupt Mask Set */ -#define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ -#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ -#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ -#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ -#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ -#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ -#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ -#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ -#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ -#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ -#define E1000_IMS_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ -#define E1000_IMS_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ -#define E1000_IMS_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ -#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW -#define E1000_IMS_SRPD E1000_ICR_SRPD -#define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ -#define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ -#define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ -#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ -#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ -#define E1000_IMS_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ -#define E1000_IMS_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ -#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ -#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ -#define E1000_IMS_DSW E1000_ICR_DSW -#define E1000_IMS_PHYINT E1000_ICR_PHYINT -#define E1000_IMS_EPRST E1000_ICR_EPRST - -/* Interrupt Mask Clear */ -#define E1000_IMC_TXDW E1000_ICR_TXDW /* Transmit desc written back */ -#define E1000_IMC_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ -#define E1000_IMC_LSC E1000_ICR_LSC /* Link Status Change */ -#define E1000_IMC_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ -#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ -#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ -#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ -#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ -#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ -#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ -#define E1000_IMC_GPI_EN1 E1000_ICR_GPI_EN1 /* GP Int 1 */ -#define E1000_IMC_GPI_EN2 E1000_ICR_GPI_EN2 /* GP Int 2 */ -#define E1000_IMC_GPI_EN3 E1000_ICR_GPI_EN3 /* GP Int 3 */ -#define E1000_IMC_TXD_LOW E1000_ICR_TXD_LOW -#define E1000_IMC_SRPD E1000_ICR_SRPD -#define E1000_IMC_ACK E1000_ICR_ACK /* Receive Ack frame */ -#define E1000_IMC_MNG E1000_ICR_MNG /* Manageability event */ -#define E1000_IMC_DOCK E1000_ICR_DOCK /* Dock/Undock */ -#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ -#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ -#define E1000_IMC_HOST_ARB_PAR E1000_ICR_HOST_ARB_PAR /* host arb read buffer parity error */ -#define E1000_IMC_PB_PAR E1000_ICR_PB_PAR /* packet buffer parity error */ -#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1 /* queue 1 Rx descriptor FIFO parity error */ -#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1 /* queue 1 Tx descriptor FIFO parity error */ -#define E1000_IMC_DSW E1000_ICR_DSW -#define E1000_IMC_PHYINT E1000_ICR_PHYINT -#define E1000_IMC_EPRST E1000_ICR_EPRST - -/* Receive Control */ -#define E1000_RCTL_RST 0x00000001 /* Software reset */ -#define E1000_RCTL_EN 0x00000002 /* enable */ -#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ -#define E1000_RCTL_UPE 0x00000008 /* unicast promiscuous enable */ -#define E1000_RCTL_MPE 0x00000010 /* multicast promiscuous enab */ -#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ -#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ -#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ -#define E1000_RCTL_LBM_SLP 0x00000080 /* serial link loopback mode */ -#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ -#define E1000_RCTL_DTYP_MASK 0x00000C00 /* Descriptor type mask */ -#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ -#define E1000_RCTL_RDMTS_HALF 0x00000000 /* rx desc min threshold size */ -#define E1000_RCTL_RDMTS_QUAT 0x00000100 /* rx desc min threshold size */ -#define E1000_RCTL_RDMTS_EIGTH 0x00000200 /* rx desc min threshold size */ -#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ -#define E1000_RCTL_MO_0 0x00000000 /* multicast offset 11:0 */ -#define E1000_RCTL_MO_1 0x00001000 /* multicast offset 12:1 */ -#define E1000_RCTL_MO_2 0x00002000 /* multicast offset 13:2 */ -#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ -#define E1000_RCTL_MDR 0x00004000 /* multicast desc ring 0 */ -#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ -/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ -#define E1000_RCTL_SZ_2048 0x00000000 /* rx buffer size 2048 */ -#define E1000_RCTL_SZ_1024 0x00010000 /* rx buffer size 1024 */ -#define E1000_RCTL_SZ_512 0x00020000 /* rx buffer size 512 */ -#define E1000_RCTL_SZ_256 0x00030000 /* rx buffer size 256 */ -/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ -#define E1000_RCTL_SZ_16384 0x00010000 /* rx buffer size 16384 */ -#define E1000_RCTL_SZ_8192 0x00020000 /* rx buffer size 8192 */ -#define E1000_RCTL_SZ_4096 0x00030000 /* rx buffer size 4096 */ -#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ -#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ -#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ -#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ -#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ -#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ -#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ -#define E1000_RCTL_FLXBUF_MASK 0x78000000 /* Flexible buffer size */ -#define E1000_RCTL_FLXBUF_SHIFT 27 /* Flexible buffer shift */ - - -#define E1000_EEPROM_SWDPIN0 0x0001 /* SWDPIN 0 EEPROM Value */ -#define E1000_EEPROM_LED_LOGIC 0x0020 /* Led Logic Word */ -#define E1000_EEPROM_RW_REG_DATA 16 /* Offset to data in EEPROM read/write registers */ -#define E1000_EEPROM_RW_REG_DONE 0x10 /* Offset to READ/WRITE done bit */ -#define E1000_EEPROM_RW_REG_START 1 /* First bit for telling part to start operation */ -#define E1000_EEPROM_RW_ADDR_SHIFT 8 /* Shift to the address bits */ -#define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */ -#define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */ -/* Register Bit Masks */ -/* Device Control */ -#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ -#define E1000_CTRL_BEM 0x00000002 /* Endian Mode.0=little,1=big */ -#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ -#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */ -#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ -#define E1000_CTRL_TME 0x00000010 /* Test mode. 0=normal,1=test */ -#define E1000_CTRL_SLE 0x00000020 /* Serial Link on 0=dis,1=en */ -#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ -#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ -#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ -#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ -#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ -#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ -#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ -#define E1000_CTRL_BEM32 0x00000400 /* Big Endian 32 mode */ -#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ -#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ -#define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ -#define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ -#define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ -#define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ -#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ -#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ -#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ -#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ -#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ -#define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ -#define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ -#define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ -#define E1000_CTRL_RST 0x04000000 /* Global reset */ -#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ -#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ -#define E1000_CTRL_RTE 0x20000000 /* Routing tag enable */ -#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ -#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ -#define E1000_CTRL_SW2FW_INT 0x02000000 /* Initiate an interrupt to manageability engine */ - -/* Device Status */ -#define E1000_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ -#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ -#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ -#define E1000_STATUS_FUNC_SHIFT 2 -#define E1000_STATUS_FUNC_0 0x00000000 /* Function 0 */ -#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ -#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ -#define E1000_STATUS_TBIMODE 0x00000020 /* TBI mode */ -#define E1000_STATUS_SPEED_MASK 0x000000C0 -#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ -#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ -#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ -#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion - by EEPROM/Flash */ -#define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ -#define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ -#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ -#define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ -#define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ -#define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ -#define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ -#define E1000_STATUS_PCIX_SPEED 0x0000C000 /* PCI-X bus speed */ -#define E1000_STATUS_BMC_SKU_0 0x00100000 /* BMC USB redirect disabled */ -#define E1000_STATUS_BMC_SKU_1 0x00200000 /* BMC SRAM disabled */ -#define E1000_STATUS_BMC_SKU_2 0x00400000 /* BMC SDRAM disabled */ -#define E1000_STATUS_BMC_CRYPTO 0x00800000 /* BMC crypto disabled */ -#define E1000_STATUS_BMC_LITE 0x01000000 /* BMC external code execution disabled */ -#define E1000_STATUS_RGMII_ENABLE 0x02000000 /* RGMII disabled */ -#define E1000_STATUS_FUSE_8 0x04000000 -#define E1000_STATUS_FUSE_9 0x08000000 -#define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ -#define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ - -/* EEPROM/Flash Control */ -#define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ -#define E1000_EECD_CS 0x00000002 /* EEPROM Chip Select */ -#define E1000_EECD_DI 0x00000004 /* EEPROM Data In */ -#define E1000_EECD_DO 0x00000008 /* EEPROM Data Out */ -#define E1000_EECD_FWE_MASK 0x00000030 -#define E1000_EECD_FWE_DIS 0x00000010 /* Disable FLASH writes */ -#define E1000_EECD_FWE_EN 0x00000020 /* Enable FLASH writes */ -#define E1000_EECD_FWE_SHIFT 4 -#define E1000_EECD_REQ 0x00000040 /* EEPROM Access Request */ -#define E1000_EECD_GNT 0x00000080 /* EEPROM Access Grant */ -#define E1000_EECD_PRES 0x00000100 /* EEPROM Present */ -#define E1000_EECD_SIZE 0x00000200 /* EEPROM Size (0=64 word 1=256 word) */ -#define E1000_EECD_ADDR_BITS 0x00000400 /* EEPROM Addressing bits based on type - * (0-small, 1-large) */ -#define E1000_EECD_TYPE 0x00002000 /* EEPROM Type (1-SPI, 0-Microwire) */ -#ifndef E1000_EEPROM_GRANT_ATTEMPTS -#define E1000_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */ -#endif -#define E1000_EECD_AUTO_RD 0x00000200 /* EEPROM Auto Read done */ -#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* EEprom Size */ -#define E1000_EECD_SIZE_EX_SHIFT 11 -#define E1000_EECD_NVADDS 0x00018000 /* NVM Address Size */ -#define E1000_EECD_SELSHAD 0x00020000 /* Select Shadow RAM */ -#define E1000_EECD_INITSRAM 0x00040000 /* Initialize Shadow RAM */ -#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ -#define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ -#define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ -#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ -#define E1000_EECD_SECVAL_SHIFT 22 -#define E1000_STM_OPCODE 0xDB00 -#define E1000_HICR_FW_RESET 0xC0 - -#define E1000_SHADOW_RAM_WORDS 2048 -#define E1000_ICH_NVM_SIG_WORD 0x13 -#define E1000_ICH_NVM_SIG_MASK 0xC0 - -/* MDI Control */ -#define E1000_MDIC_DATA_MASK 0x0000FFFF -#define E1000_MDIC_REG_MASK 0x001F0000 -#define E1000_MDIC_REG_SHIFT 16 -#define E1000_MDIC_PHY_MASK 0x03E00000 -#define E1000_MDIC_PHY_SHIFT 21 -#define E1000_MDIC_OP_WRITE 0x04000000 -#define E1000_MDIC_OP_READ 0x08000000 -#define E1000_MDIC_READY 0x10000000 -#define E1000_MDIC_INT_EN 0x20000000 -#define E1000_MDIC_ERROR 0x40000000 - -/* EEPROM Commands - Microwire */ -#define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */ -#define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */ -#define EEPROM_ERASE_OPCODE_MICROWIRE 0x7 /* EEPROM erase opcode */ -#define EEPROM_EWEN_OPCODE_MICROWIRE 0x13 /* EEPROM erase/write enable */ -#define EEPROM_EWDS_OPCODE_MICROWIRE 0x10 /* EEPROM erast/write disable */ - -/* EEPROM Word Offsets */ -#define EEPROM_COMPAT 0x0003 -#define EEPROM_ID_LED_SETTINGS 0x0004 -#define EEPROM_VERSION 0x0005 -#define EEPROM_SERDES_AMPLITUDE 0x0006 /* For SERDES output amplitude adjustment. */ -#define EEPROM_PHY_CLASS_WORD 0x0007 -#define EEPROM_INIT_CONTROL1_REG 0x000A -#define EEPROM_INIT_CONTROL2_REG 0x000F -#define EEPROM_SWDEF_PINS_CTRL_PORT_1 0x0010 -#define EEPROM_INIT_CONTROL3_PORT_B 0x0014 -#define EEPROM_INIT_3GIO_3 0x001A -#define EEPROM_SWDEF_PINS_CTRL_PORT_0 0x0020 -#define EEPROM_INIT_CONTROL3_PORT_A 0x0024 -#define EEPROM_CFG 0x0012 -#define EEPROM_FLASH_VERSION 0x0032 -#define EEPROM_CHECKSUM_REG 0x003F - -#define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ -#define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ - -/* Transmit Descriptor */ -struct e1000_tx_desc { - uint64_t buffer_addr; /* Address of the descriptor's data buffer */ - union { - uint32_t data; - struct { - uint16_t length; /* Data buffer length */ - uint8_t cso; /* Checksum offset */ - uint8_t cmd; /* Descriptor control */ - } flags; - } lower; - union { - uint32_t data; - struct { - uint8_t status; /* Descriptor status */ - uint8_t css; /* Checksum start */ - uint16_t special; - } fields; - } upper; -}; - -/* Transmit Descriptor bit definitions */ -#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ -#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ -#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ -#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ -#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ -#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ -#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ -#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ -#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ -#define E1000_TXD_CMD_DEXT 0x20000000 /* Descriptor extension (0 = legacy) */ -#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ -#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ -#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ -#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ -#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ -#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ -#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ -#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ -#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ -#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ - -/* Transmit Control */ -#define E1000_TCTL_RST 0x00000001 /* software reset */ -#define E1000_TCTL_EN 0x00000002 /* enable tx */ -#define E1000_TCTL_BCE 0x00000004 /* busy check enable */ -#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ -#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ -#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ -#define E1000_TCTL_SWXOFF 0x00400000 /* SW Xoff transmission */ -#define E1000_TCTL_PBE 0x00800000 /* Packet Burst Enable */ -#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ -#define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ -#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ - -/* Receive Descriptor */ -struct e1000_rx_desc { - uint64_t buffer_addr; /* Address of the descriptor's data buffer */ - uint16_t length; /* Length of data DMAed into data buffer */ - uint16_t csum; /* Packet checksum */ - uint8_t status; /* Descriptor status */ - uint8_t errors; /* Descriptor Errors */ - uint16_t special; -}; - -/* Receive Descriptor bit definitions */ -#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ -#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ -#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ -#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ -#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum caculated */ -#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ -#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ -#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ -#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ -#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ -#define E1000_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ -#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ -#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ -#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ -#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ -#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ -#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ -#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ -#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ -#define E1000_RXD_SPC_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ -#define E1000_RXD_SPC_PRI_SHIFT 13 -#define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ -#define E1000_RXD_SPC_CFI_SHIFT 12 - -#define E1000_RXDEXT_STATERR_CE 0x01000000 -#define E1000_RXDEXT_STATERR_SE 0x02000000 -#define E1000_RXDEXT_STATERR_SEQ 0x04000000 -#define E1000_RXDEXT_STATERR_CXE 0x10000000 -#define E1000_RXDEXT_STATERR_TCPE 0x20000000 -#define E1000_RXDEXT_STATERR_IPE 0x40000000 -#define E1000_RXDEXT_STATERR_RXE 0x80000000 - -#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 -#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF - -/* Receive Address */ -#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ - -/* Offload Context Descriptor */ -struct e1000_context_desc { - union { - uint32_t ip_config; - struct { - uint8_t ipcss; /* IP checksum start */ - uint8_t ipcso; /* IP checksum offset */ - uint16_t ipcse; /* IP checksum end */ - } ip_fields; - } lower_setup; - union { - uint32_t tcp_config; - struct { - uint8_t tucss; /* TCP checksum start */ - uint8_t tucso; /* TCP checksum offset */ - uint16_t tucse; /* TCP checksum end */ - } tcp_fields; - } upper_setup; - uint32_t cmd_and_length; /* */ - union { - uint32_t data; - struct { - uint8_t status; /* Descriptor status */ - uint8_t hdr_len; /* Header length */ - uint16_t mss; /* Maximum segment size */ - } fields; - } tcp_seg_setup; -}; - -/* Offload data descriptor */ -struct e1000_data_desc { - uint64_t buffer_addr; /* Address of the descriptor's buffer address */ - union { - uint32_t data; - struct { - uint16_t length; /* Data buffer length */ - uint8_t typ_len_ext; /* */ - uint8_t cmd; /* */ - } flags; - } lower; - union { - uint32_t data; - struct { - uint8_t status; /* Descriptor status */ - uint8_t popts; /* Packet Options */ - uint16_t special; /* */ - } fields; - } upper; -}; - -/* Management Control */ -#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ -#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ -#define E1000_MANC_R_ON_FORCE 0x00000004 /* Reset on Force TCO - RO */ -#define E1000_MANC_RMCP_EN 0x00000100 /* Enable RCMP 026Fh Filtering */ -#define E1000_MANC_0298_EN 0x00000200 /* Enable RCMP 0298h Filtering */ -#define E1000_MANC_IPV4_EN 0x00000400 /* Enable IPv4 */ -#define E1000_MANC_IPV6_EN 0x00000800 /* Enable IPv6 */ -#define E1000_MANC_SNAP_EN 0x00001000 /* Accept LLC/SNAP */ -#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ -#define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery - * Filtering */ -#define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ -#define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ -#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ -#define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ -#define E1000_MANC_RCV_ALL 0x00080000 /* Receive All Enabled */ -#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ -#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 /* Enable MAC address - * filtering */ -#define E1000_MANC_EN_MNG2HOST 0x00200000 /* Enable MNG packets to host - * memory */ -#define E1000_MANC_EN_IP_ADDR_FILTER 0x00400000 /* Enable IP address - * filtering */ -#define E1000_MANC_EN_XSUM_FILTER 0x00800000 /* Enable checksum filtering */ -#define E1000_MANC_BR_EN 0x01000000 /* Enable broadcast filtering */ -#define E1000_MANC_SMB_REQ 0x01000000 /* SMBus Request */ -#define E1000_MANC_SMB_GNT 0x02000000 /* SMBus Grant */ -#define E1000_MANC_SMB_CLK_IN 0x04000000 /* SMBus Clock In */ -#define E1000_MANC_SMB_DATA_IN 0x08000000 /* SMBus Data In */ -#define E1000_MANC_SMB_DATA_OUT 0x10000000 /* SMBus Data Out */ -#define E1000_MANC_SMB_CLK_OUT 0x20000000 /* SMBus Clock Out */ - -#define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ -#define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ - -/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ -#define EEPROM_SUM 0xBABA - -#endif /* _E1000_HW_H_ */ diff --git a/private/LINUX/vhost-port/init.sh b/private/LINUX/vhost-port/init.sh deleted file mode 100755 index 766be7ca1..000000000 --- a/private/LINUX/vhost-port/init.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -set -x - -sudo modprobe tun -sudo modprobe macvtap - -sudo ip tuntap add mode tap name tap1 -sudo ip tuntap add mode tap name tap2 -sudo ip link set tap1 up -sudo ip link set tap2 up -sudo brctl addbr br1 -sudo brctl addif br1 tap1 -sudo brctl addif br1 tap2 -sudo ip link set br1 up -sudo ip addr add 10.1.1.200/24 dev br1 - -sudo arp -s "10.1.1.1" "00:aa:bb:cc:de:1" -sudo arp -s "10.1.1.2" "00:aa:bb:cc:de:2" - -cp test.c .test.c -BR0MAC=$(ip link | tail -1 | awk '{print $2}') -sed -i "s|BR0MAC|${BR0MAC}|" test.c -make "test" - - -sudo insmod v1000_net.ko -sudo chmod a+rw /dev/v1000 - diff --git a/private/LINUX/vhost-port/net.c b/private/LINUX/vhost-port/net.c deleted file mode 100644 index 8a617d005..000000000 --- a/private/LINUX/vhost-port/net.c +++ /dev/null @@ -1,1279 +0,0 @@ -/* Author: Vincenzo Maffione - * - * Based on the vhost/vhost-net work. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * e1000-paravirt server in host kernel. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "paravirt.h" -#include "v1000.h" - - -//#define DEBUG /* Enables communication debugging. */ -#ifdef DEBUG -#define DBG(x) x -#else -#define DBG(x) -#endif - -//#define RATE /* Enables communication statistics. */ -#ifdef RATE -#define IFRATE(x) x -struct rate_stats { - unsigned long gtxk; /* Guest --> Host Tx kicks. */ - unsigned long grxk; /* Guest --> Host Rx kicks. */ - unsigned long htxk; /* Host --> Guest Tx kicks. */ - unsigned long hrxk; /* Host --> Guest Rx Kicks. */ - unsigned long btxwu; /* Backend Tx wake-up. */ - unsigned long brxwu; /* Backend Rx wake-up. */ - unsigned long txpkts; /* Transmitted packets. */ - unsigned long rxpkts; /* Received packets. */ - unsigned long txfl; /* TX flushes requests. */ -}; - -struct rate_context { - struct timer_list timer; - struct rate_stats new; - struct rate_stats old; -}; - -#define RATE_PERIOD 2 -static void rate_callback(unsigned long arg) -{ - struct rate_context * ctx = (struct rate_context *)arg; - struct rate_stats cur = ctx->new; - int r; - - printk("txp = %lu Hz\n", (cur.txpkts - ctx->old.txpkts)/RATE_PERIOD); - printk("gtxk = %lu Hz\n", (cur.gtxk - ctx->old.gtxk)/RATE_PERIOD); - printk("htxk = %lu Hz\n", (cur.htxk - ctx->old.htxk)/RATE_PERIOD); - printk("btxw = %lu Hz\n", (cur.btxwu - ctx->old.btxwu)/RATE_PERIOD); - printk("rxp = %lu Hz\n", (cur.rxpkts - ctx->old.rxpkts)/RATE_PERIOD); - printk("grxk = %lu Hz\n", (cur.grxk - ctx->old.grxk)/RATE_PERIOD); - printk("hrxk = %lu Hz\n", (cur.hrxk - ctx->old.hrxk)/RATE_PERIOD); - printk("brxw = %lu Hz\n", (cur.brxwu - ctx->old.brxwu)/RATE_PERIOD); - printk("txfl = %lu Hz\n", (cur.txfl - ctx->old.txfl)/RATE_PERIOD); - printk("\n"); - - ctx->old = cur; - r = mod_timer(&ctx->timer, jiffies + - msecs_to_jiffies(RATE_PERIOD * 1000)); - if (unlikely(r)) - printk("[v1000] Error: mod_timer()\n"); -} -#else -#define IFRATE(x) -#endif - - -struct e1000_tx_context { - bool vlan_needed; - uint8_t ipcss; - uint8_t ipcso; - uint16_t ipcse; - uint32_t paylen; - bool tcp; -}; - -struct e1000_state { - uint32_t tdt; - uint32_t tdh; - uint32_t rdt; - uint32_t rdh; - struct e1000_tx_context txc; - uint32_t txnum; /* Number of TX descriptors. */ - uint32_t rxnum; /* Number of RX descriptors. */ -}; - -/* Max number of bytes transferred before requeueing the job. - * Using this limit prevents one virtqueue from starving others. */ -#define V1000_NET_WEIGHT 0x80000 - -/* A set of callbacks through wich the v1000 frontend interacts - with a backend (socket or netmap). */ -struct v1000_backend { - /* Get the file struct attached to the backend. */ - struct file *(*get_file)(void *opaque); - /* Send a packet to the backend. */ - int (*sendmsg)(void *opaque, struct msghdr *msg, size_t iovlen, unsigned flags); - /* Get the length of the next rx buffer ready into the backend. */ - int (*peek_head_len)(void *opaque); - /* Receive a packet from the backend. */ - int (*recvmsg)(void *opaque, struct msghdr *msg, size_t len); -}; - -struct v1000_net { - struct v1000_dev dev; - struct v1000_ring tx_ring, rx_ring; - struct v1000_poll tx_poll, rx_poll; - - struct V1000Config config; - bool configured; - struct e1000_tx_desc __user * tx_desc; - struct e1000_rx_desc __user * rx_desc; - struct paravirt_csb __user * csb; - struct virtio_net_hdr __user * tx_hdr; - struct virtio_net_hdr __user * rx_hdr; - struct e1000_state state; - bool broken; - struct v1000_backend backend; - IFRATE(struct rate_context rate_ctx); -}; - -/* #################### SOCKET BACKEND CALLBACKS ################### */ -static struct file *socket_backend_get_file(void *opaque) -{ - struct socket *sock = (struct socket *)opaque; - - return sock->file; -} - -static int socket_backend_sendmsg(void *opaque, struct msghdr *msg, - size_t iovlen, unsigned flags) -{ - struct socket *sock = (struct socket *)opaque; - - return sock->ops->sendmsg(NULL, sock, msg, iovlen); -} - -static int socket_backend_peek_head_len(void *opaque) -{ - struct socket *sock = (struct socket *)opaque; - struct sock *sk = sock->sk; - struct sk_buff *head; - int len = 0; - unsigned long flags; - - spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); - head = skb_peek(&sk->sk_receive_queue); - if (likely(head)) { - len = head->len; - if (vlan_tx_tag_present(head)) - len += VLAN_HLEN; - } - - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); - return len; -} - -static int socket_backend_recvmsg(void *opaque, struct msghdr *msg, - size_t len) -{ - struct socket *sock = (struct socket *)opaque; - - return sock->ops->recvmsg(NULL, sock, msg, len, - MSG_DONTWAIT | MSG_TRUNC); -} - -#ifdef DEBUG -/* Print the translation table. */ -static void print_translations(struct v1000_net * net) -{ - struct V1000Translation * tr = &net->config.tr; - int i; - - printk("Translation table.%p: (#%u)\n", net, tr->num); - for (i=0; inum; i++) { - printk(" idx=%d, pa=%llu, l=%llu, va=%p\n", i, - tr->table[i].phy, - tr->table[i].length, - tr->table[i].virt); - } - printk("\n"); -} -#endif - -static void * lookup_translation(struct v1000_net * net, uint64_t address, uint64_t length) -{ - struct V1000Translation * tr = &net->config.tr; - int i; - - for (i=0; inum; i++) { - /* printk("(%llu,%llu) against (%llu,%llu,%p)\n", address, length, - tr->table[i].phy, tr->table[i].length, tr->table[i].virt); */ - if (address >= tr->table[i].phy && address + length <= - tr->table[i].phy + tr->table[i].length) { - /* The requested address range is completely included - in this traslated memory chunk. We have a (complete) - hit. */ - return (void *)(((uint64_t)tr->table[i].virt + address) - tr->table[i].phy); - } - } - - return NULL; -} - -#define CSB_READ(csb, field, r) \ - do { \ - if (get_user(r, &csb->field)) { \ - r = -EFAULT; \ - } \ - } while (0) - -#define CSB_WRITE(csb, field, v) \ - do { \ - if (put_user(v, &csb->field)) { \ - v = -EFAULT; \ - } \ - } while (0) - -static inline void v1000_set_txkick(struct v1000_net *net, bool enable) -{ - uint32_t v = enable ? 1 : 0; - - CSB_WRITE(net->csb, host_need_txkick, v); -} - -static inline bool v1000_tx_interrupts_enabled(struct v1000_net * net) -{ - uint32_t v; - - CSB_READ(net->csb, guest_need_txkick, v); - - return v; -} - -/* Expects to be always run from workqueue - which acts as - * read-size critical section for our kind of RCU. */ -static void handle_tx(struct v1000_net *net) -{ - struct v1000_ring *vr = &net->tx_ring; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_control = NULL, - .msg_controllen = 0, - .msg_iov = vr->iov, - .msg_flags = MSG_DONTWAIT, - }; - struct virtio_net_hdr hdr; - size_t iovlen, total_len = 0; - int err; - void *opaque; - struct e1000_state * st = &net->state; - struct e1000_tx_desc desc; - struct e1000_context_desc * ctxdp = - (struct e1000_context_desc *)&desc; - void __user * va; - unsigned iovcnt, wbcnt, i; - bool work = false; - uint16_t len; - uint32_t desc_type; - bool eop; - uint32_t next_tdh; - - mutex_lock(&vr->mutex); - opaque = vr->private_data; - if (unlikely(!opaque || net->broken)) { - printk("[v1000] Broken device\n"); - goto leave; - } - - /* Disable notifications. */ - v1000_set_txkick(net, false); - - next_tdh = st->tdh + 1; - if (unlikely(next_tdh == st->txnum)) { - next_tdh = 0; - } - - smp_mb(); - CSB_READ(net->csb, guest_tdt, st->tdt); - if (unlikely(st->tdt >= st->txnum)) { - net->broken = true; - goto leave; - } - for (;;) { - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (st->tdt == st->tdh) { - /* Reenable notifications. */ - v1000_set_txkick(net, true); - /* Doublecheck. */ - smp_mb(); - CSB_READ(net->csb, guest_tdt, st->tdt); - if (unlikely(st->tdt >= st->txnum)) { - net->broken = true; - goto leave; - } - if (unlikely(st->tdt != st->tdh)) { - v1000_set_txkick(net, false); - continue; - } - break; - } - - /* Use the first iovec slot for the virtio-net header. */ - vr->iov[0].iov_base = net->tx_hdr + st->tdh; - vr->iov[0].iov_len = sizeof(struct virtio_net_hdr); - memset(&hdr, 0, sizeof(struct virtio_net_hdr)); -#if VIRTIO_NET_HDR_GSO_NONE - hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; -#endif - /* TODO refer to a global zero (null) vnet-hdr to avoid the - copy_to_user when the header is zero. */ - - /* Collect the TX descriptors. */ - iovcnt = 1; - wbcnt = 0; - eop = false; - do { - if (unlikely(st->tdh == st->tdt)) { - printk("[v1000] Broken TX descriptor chain: expected EOP\n"); - break; - } - - /* Read a descriptor. */ - if (unlikely(copy_from_user(&desc, net->tx_desc + st->tdh, - sizeof(struct e1000_tx_desc)))) { - printk("copy_from_user(txdesc) FAILED!!!\n"); - net->broken = true; - goto leave; - } - - /* Process the descriptor. */ - if (desc.lower.data & E1000_TXD_CMD_RS) { - /* Register a writeback operation. */ - vr->wb[wbcnt].addr = (uint8_t *)&net->tx_desc[st->tdh].upper.data; - vr->wb[wbcnt].value = E1000_TXD_STAT_DD; - wbcnt++; - } - desc_type = desc.lower.data & (E1000_TXD_CMD_DEXT - | E1000_TXD_DTYP_D); - if (desc_type == E1000_TXD_CMD_DEXT) { /* Context descriptor. */ - if (unlikely(iovcnt != 1)) { - printk("[v1000] Warning: TX context descriptor in the" - "middle of a packet. Discarding %d data" - "descriptors\n", iovcnt - 1); - eop = true; - } - st->txc.ipcss = ctxdp->lower_setup.ip_fields.ipcss; - st->txc.ipcso = ctxdp->lower_setup.ip_fields.ipcso; - st->txc.ipcse = ctxdp->lower_setup.ip_fields.ipcse; - st->txc.paylen = ctxdp->cmd_and_length & 0xfffff; - st->txc.tcp = ((ctxdp->cmd_and_length & - E1000_TXD_CMD_TCP) != 0); - - if ((ctxdp->cmd_and_length & E1000_TXD_CMD_TSE)) { - hdr.gso_type = (ctxdp->cmd_and_length & - E1000_TXD_CMD_IP) ? VIRTIO_NET_HDR_GSO_TCPV4: - VIRTIO_NET_HDR_GSO_TCPV6; - hdr.gso_size = ctxdp->tcp_seg_setup.fields.mss; - hdr.hdr_len = ctxdp->tcp_seg_setup.fields.hdr_len; - } - - hdr.csum_start = ctxdp->upper_setup.tcp_fields.tucss; - hdr.csum_offset = ctxdp->upper_setup.tcp_fields.tucso - ctxdp->upper_setup.tcp_fields.tucss; - if (unlikely(ctxdp->upper_setup.tcp_fields.tucse)) { - printk("[v1000] Warning: Checksum on partial payload\n"); - } - } else { /* Data descriptor. */ - if (desc_type == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { - /* Extended descriptor. */ - if (iovcnt == 1) { - if (desc.upper.data & (E1000_TXD_POPTS_TXSM << 8)) - hdr.flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; - /* Don't check for IP checksumming, it's already computed - by the guest kernel. - if (desc.upper.data & (E1000_TXD_POPTS_IXSM << 8)) { - } */ - } - if (unlikely(hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE && - !(desc.lower.data & E1000_TXD_CMD_TSE))) { - printk("[v1000] TCP segmentation error\n"); - goto next; - } - } else { - /* Legacy descriptor. */ - } - - len = desc.lower.data & 0xffff; - va = lookup_translation(net, desc.buffer_addr, len); - if (unlikely(!va)) { - printk("Address translation FAILED: tdh=%u, phy=%llu, len=%u\n", st->tdh, desc.buffer_addr, len); - net->broken = true; - goto leave; - } - DBG(printk("tx: phy=%llu,len=%u,virt=%p,TDH=%u,TDT=%u\n", desc.buffer_addr, len, va, st->tdh, st->tdt)); - vr->iov[iovcnt].iov_base = va; - vr->iov[iovcnt].iov_len = len; - iovcnt++; - - if (desc.lower.data & E1000_TXD_CMD_EOP) { - eop = true; - /* Insert virtio-net header. */ - DBG(printk("hdr: flags=%X, cs=%u, co=%u, gso_t=%u, gso_s=%u, hlen=%u\n", hdr.flags, hdr.csum_start, hdr.csum_offset, hdr.gso_type, hdr.gso_size, hdr.hdr_len)); - if (unlikely(copy_to_user(vr->iov->iov_base, &hdr, sizeof(hdr)))) { - printk("copy_to_user(vnet_hdr)\n"); - net->broken = true; - break; - } - - /* Reset the TX context. */ - st->txc.vlan_needed = 0; - - /* Once we have collected all the frame fragments, - we can send it through the backend. */ - msg.msg_iovlen = iovcnt; - /* TODO compute iovlen during the cycle */ - iovlen = iov_length(vr->iov, iovcnt); - err = net->backend.sendmsg(opaque, &msg, iovlen, - st->tdt == next_tdh ? 0 : MSG_MORE); - IFRATE(if (st->tdt == next_tdh) net->rate_ctx.new.txfl++); - if (unlikely(err < 0)) { - printk("sendmsg() err!!\n"); - goto leave; // XXX - } - if (unlikely(err != iovlen)) - pr_debug("Truncated TX packet\n"); - total_len += iovlen; - IFRATE(net->rate_ctx.new.txpkts++); - - smp_wmb(); - for (i=0; iwb[i].value, - vr->wb[i].addr))) { - printk("copy_to_user(tx writeback)\n"); - net->broken = true; - goto leave; - } - } - work = true; - } - } - -next: - st->tdh = next_tdh; - if (unlikely(++next_tdh == st->txnum)) - next_tdh = 0; - } while (!eop); - - if (unlikely(total_len >= V1000_NET_WEIGHT)) { - v1000_poll_queue(&vr->poll); - break; - } - - if (st->tdt == st->tdh) { - /* Reload 'tdt' only when necessary. */ - smp_mb(); - CSB_READ(net->csb, guest_tdt, st->tdt); - if (unlikely(st->tdt >= st->txnum)) { - net->broken = true; - goto leave; - } - } - } - -leave: - if (work && v1000_tx_interrupts_enabled(net)) { - eventfd_signal(vr->call_ctx, 1); - IFRATE(net->rate_ctx.new.htxk++); - } - mutex_unlock(&vr->mutex); - - return; -} - -static inline uint32_t v1000_avail_rx(struct v1000_net * net) -{ - return ((net->state.rxnum + net->state.rdt) - net->state.rdh) % net->state.rxnum; -} - -static uint32_t v1000_avail_rx_bytes(struct v1000_net * net) -{ - return v1000_avail_rx(net) * net->config.rxbuf_size; -} - -static inline void v1000_set_rxkick(struct v1000_net *net, bool enable) -{ - uint32_t v; - - if (enable) { - v = (net->state.rdt + 1 + (net->state.rxnum - v1000_avail_rx(net) - 1) * 3/4) % net->state.rxnum; - } else - v = NET_PARAVIRT_NONE; - CSB_WRITE(net->csb, host_rxkick_at, v); -} - -static inline bool v1000_rx_interrupts_enabled(struct v1000_net * net) -{ - uint32_t v; - - CSB_READ(net->csb, guest_need_rxkick, v); - - return v; -} - -#if 0 -long lj = 0; -long cj; -int c = 0; -#endif - -/* Expects to be always run from workqueue - which acts as - * read-size critical section for our kind of RCU. */ -static void handle_rx(struct v1000_net *net) -{ - struct v1000_ring *vr = &net->rx_ring; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_control = NULL, /* FIXME: get and handle RX aux data. */ - .msg_controllen = 0, - .msg_iov = vr->iov, - .msg_flags = MSG_DONTWAIT, - }; - size_t total_len = 0; - int err; - size_t sock_len; - void *opaque; - struct e1000_state * st = &net->state; - struct e1000_rx_desc desc; - void __user * va; - uint32_t avail_bytes; - unsigned fill; - uint16_t wblen; - uint32_t rdh; - unsigned iovcnt, i; - - DBG(printk("handle_rx()\n")); - - mutex_lock(&vr->mutex); - opaque = vr->private_data; - if (unlikely(!opaque || net->broken)) { - printk("[v1000] Broken device\n"); - goto leave; - } - - /* XXX Disable notification only when NOT using host_rxkick_at. */ - v1000_set_rxkick(net, false); - CSB_READ(net->csb, guest_rdt, st->rdt); - - while ((sock_len = net->backend.peek_head_len(opaque))) { - fill = sock_len; - sock_len += sizeof(struct virtio_net_hdr); - avail_bytes = v1000_avail_rx_bytes(net); - if (avail_bytes < sock_len) { - /* Reload rdt only when necessary. */ - CSB_READ(net->csb, guest_rdt, st->rdt); - avail_bytes = v1000_avail_rx_bytes(net); - if (avail_bytes < sock_len) { - /* If not enough space, reenable notifications. */ - v1000_set_rxkick(net, true); - smp_mb(); - /* Doublecheck. */ - CSB_READ(net->csb, guest_rdt, st->rdt); - avail_bytes = v1000_avail_rx_bytes(net); - if (avail_bytes < sock_len) - break; - v1000_set_rxkick(net, false); - } - } - - rdh = st->rdh; - /* The first slot of the iovec into which receive a new frame - will be used for the virtio-net header. */ - vr->iov[0].iov_base = net->rx_hdr + rdh; - vr->iov[0].iov_len = sizeof(struct virtio_net_hdr); - - /* Use RX descriptors to fill the remainder of vr->iov. */ - iovcnt = 1; - while (fill) { - /* Read the address into the descriptor. */ - if (unlikely(get_user(desc.buffer_addr, - (uint64_t *)(net->rx_desc + rdh)))) { - printk("copy_from_user(rxdesc) FAILED!!!\n"); - net->broken = true; - goto leave; - } - - /* Process the descriptor. */ - va = lookup_translation(net, desc.buffer_addr, - net->config.rxbuf_size); - if (unlikely(!va)) { - printk("Address translation FAILED: rdh=%u, phy=%llu, len=%u\n", rdh, desc.buffer_addr, net->config.rxbuf_size); - net->broken = true; - goto leave; - } - wblen = desc.length = net->config.rxbuf_size; - if (fill <= net->config.rxbuf_size) { - /* Last fragment. */ - desc.length = fill; - wblen = fill + 4; /* FCS aka Ethernet CRC. */ - } - - vr->iov[iovcnt].iov_base = va; - vr->iov[iovcnt].iov_len = desc.length; - vr->wb[iovcnt-1].addr = &net->rx_desc[rdh].status; - vr->wb[iovcnt-1].value = E1000_RXD_STAT_DD; - iovcnt++; - - /* Length writeback. */ - if (unlikely(put_user(wblen, &net->rx_desc[rdh].length))) { - printk("copy_to_user(rx len writeback)\n"); - net->broken = true; - goto leave; - } - - if (unlikely(++rdh == st->rxnum)) - rdh = 0; - fill -= desc.length; - } - vr->wb[iovcnt-2].value |= E1000_RXD_STAT_EOP; - msg.msg_iovlen = iovcnt; - - err = net->backend.recvmsg(opaque, &msg, sock_len); - /* Userspace might have consumed the packet meanwhile: - * it's not supposed to do this usually, but might be hard - * to prevent. Discard data we got (if any) and keep going. */ - if (unlikely(err != sock_len)) { - printk("Discarded rx packet: " - " len %d, expected %zd\n", err, sock_len); - /* Recover the RX descriptors. */ - continue; - } - - smp_mb(); - - /* Descriptors writeback. */ - for (i=0; iwb[i].value, vr->wb[i].addr))) { - printk("copy_to_user(rx writeback)\n"); - net->broken = true; - goto leave; - } - } - - st->rdh = rdh; - IFRATE(net->rate_ctx.new.rxpkts++); - - DBG(printk("received packet [len=%u,iovcnt=%u,rdh=%u,rdt=%u,avail=%u]\n", (unsigned)sock_len, iovcnt, st->rdh, st->rdt, avail_bytes)); -#if 0 - if (++c == 100000) { - cj = jiffies; - if (cj != lj) - printk("%lu pps\n", 300 * c / (cj - lj)); - c = 0; - lj = jiffies; - } -#endif - - total_len += sock_len; - if (unlikely(total_len >= V1000_NET_WEIGHT)) { - v1000_poll_queue(&vr->poll); - break; - } - } - -leave: - if (v1000_rx_interrupts_enabled(net)) { - eventfd_signal(vr->call_ctx, 1); - IFRATE(net->rate_ctx.new.hrxk++); - } - mutex_unlock(&vr->mutex); - DBG(printk("rxintr=%d\n", v1000_rx_interrupts_enabled(net))); -} - -static void handle_tx_kick(struct v1000_work *work) -{ - struct v1000_ring *vr = container_of(work, struct v1000_ring, - poll.work); - struct v1000_net *net = container_of(vr->dev, struct v1000_net, dev); - - IFRATE(net->rate_ctx.new.gtxk++); - handle_tx(net); -} - -static void handle_rx_kick(struct v1000_work *work) -{ - struct v1000_ring *vr = container_of(work, struct v1000_ring, - poll.work); - struct v1000_net *net = container_of(vr->dev, struct v1000_net, dev); - - IFRATE(net->rate_ctx.new.grxk++); - handle_rx(net); -} - -static void handle_tx_net(struct v1000_work *work) -{ - struct v1000_net *net = container_of(work, struct v1000_net, - tx_poll.work); - - IFRATE(net->rate_ctx.new.btxwu++); - handle_tx(net); -} - -static void handle_rx_net(struct v1000_work *work) -{ - struct v1000_net *net = container_of(work, struct v1000_net, - rx_poll.work); - - IFRATE(net->rate_ctx.new.brxwu++); - handle_rx(net); -} - -static int v1000_open(struct inode *inode, struct file *f) -{ - struct v1000_net *n = kmalloc(sizeof *n, GFP_KERNEL); - struct v1000_dev *dev; - int r; - - printk("%p.OPEN()\n", n); - if (!n) - return -ENOMEM; - n->configured = n->broken = false; - memset(&n->state, 0, sizeof(struct e1000_state)); - - dev = &n->dev; - n->tx_ring.handle_kick = handle_tx_kick; - n->rx_ring.handle_kick = handle_rx_kick; - r = v1000_dev_init(dev, &n->tx_ring, &n->rx_ring); - if (r < 0) { - kfree(n); - return r; - } - - v1000_poll_init(&n->tx_poll, handle_tx_net, POLLOUT, dev); - v1000_poll_init(&n->rx_poll, handle_rx_net, POLLIN, dev); - - f->private_data = n; - -#ifdef RATE - memset(&n->rate_ctx, 0, sizeof(n->rate_ctx)); - setup_timer(&n->rate_ctx.timer, &rate_callback, - (unsigned long)&n->rate_ctx); - r = mod_timer(&n->rate_ctx.timer, jiffies + msecs_to_jiffies(1500)); - if (r) - printk("[v1000] Error: mod_timer()\n"); -#endif - - printk("%p.OPEN_END()\n", n); - - return 0; -} - -static void v1000_net_disable_vr(struct v1000_net *n, - struct v1000_ring *vr) -{ - if (!vr->private_data) - return; - if (vr == &n->tx_ring) - v1000_poll_stop(&n->tx_poll); - else - v1000_poll_stop(&n->rx_poll); -} - -static int v1000_net_enable_vr(struct v1000_net *n, - struct v1000_ring *vr) -{ - void *opaque; - int ret; - - opaque = vr->private_data; - if (!opaque) - return 0; - if (vr == &n->tx_ring) { - ret = v1000_poll_start(&n->tx_poll, n->backend.get_file(opaque)); - } else - ret = v1000_poll_start(&n->rx_poll, n->backend.get_file(opaque)); - - return ret; -} - -static void *v1000_net_stop_vr(struct v1000_net *n, - struct v1000_ring *vr) -{ - void *opaque; - - mutex_lock(&vr->mutex); - opaque = vr->private_data; - v1000_net_disable_vr(n, vr); - vr->private_data = NULL; - mutex_unlock(&vr->mutex); - return opaque; -} - -static void v1000_net_stop(struct v1000_net *n, void **tx_opaque, - void **rx_opaque) -{ - *tx_opaque = v1000_net_stop_vr(n, &n->tx_ring); - *rx_opaque = v1000_net_stop_vr(n, &n->rx_ring); -} - -static void v1000_net_flush(struct v1000_net *n) -{ - v1000_poll_flush(&n->rx_poll); - v1000_poll_flush(&n->dev.rx_ring->poll); - v1000_poll_flush(&n->tx_poll); - v1000_poll_flush(&n->dev.tx_ring->poll); -} - -static int v1000_release(struct inode *inode, struct file *f) -{ - struct v1000_net *n = f->private_data; - void *tx_opaque; - void *rx_opaque; - - printk("%p.RELEASE()\n", n); - v1000_net_stop(n, &tx_opaque, &rx_opaque); - v1000_net_flush(n); - v1000_dev_stop(&n->dev); - v1000_dev_cleanup(&n->dev); - if (tx_opaque) - fput(n->backend.get_file(tx_opaque)); - if (rx_opaque) - fput(n->backend.get_file(rx_opaque)); - /* We do an extra flush before freeing memory, - * since jobs can re-queue themselves. */ - v1000_net_flush(n); - - IFRATE(del_timer(&n->rate_ctx.timer)); - kfree(n); - printk("%p.RELEASE_END()\n", n); - - return 0; -} - -static struct socket *get_raw_socket(int fd) -{ - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; - int uaddr_len = sizeof uaddr, r; - struct socket *sock = sockfd_lookup(fd, &r); - - if (!sock) - return ERR_PTR(-ENOTSOCK); - - /* Parameter checking */ - if (sock->sk->sk_type != SOCK_RAW) { - r = -ESOCKTNOSUPPORT; - goto err; - } - - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { - r = -EPFNOSUPPORT; - goto err; - } - return sock; -err: - fput(sock->file); - return ERR_PTR(r); -} - -static struct socket *get_tap_socket(int fd) -{ - struct file *file = fget(fd); - struct socket *sock; - - if (!file) - return ERR_PTR(-EBADF); - sock = tun_get_socket(file); - if (!IS_ERR(sock)) - return sock; - sock = macvtap_get_socket(file); - if (IS_ERR(sock)) - fput(file); - return sock; -} - -struct socket *get_netmap_socket(int fd); -void *netmap_get_backend(int fd); -struct file *netmap_backend_get_file(void *opaque); -int netmap_backend_sendmsg(void *opaque, struct msghdr *m, size_t len, - unsigned flags); -int netmap_backend_peek_head_len(void *opaque); -int netmap_backend_recvmsg(void *opaque, struct msghdr *m, size_t len); - -static struct socket *get_socket(int fd) -{ - struct socket *sock; - - /* special case to disable backend */ - if (fd == -1) - return NULL; - sock = get_raw_socket(fd); - if (!IS_ERR(sock)) - return sock; - sock = get_tap_socket(fd); - if (!IS_ERR(sock)) - return sock; - sock = get_netmap_socket(fd); - if (!IS_ERR(sock)) - return sock; - return ERR_PTR(-ENOTSOCK); -} - -static void *get_backend(struct v1000_net *n, int fd) -{ - /* Probe for the netmap backend first. */ - void *ret = netmap_get_backend(fd); - - if (!IS_ERR(ret)) { - /* Set the netmap backend ops. */ - n->backend.get_file = &netmap_backend_get_file; - n->backend.sendmsg = &netmap_backend_sendmsg; - n->backend.peek_head_len = &netmap_backend_peek_head_len; - n->backend.recvmsg = &netmap_backend_recvmsg; - printk("[v1000] netmap backend selected\n"); - return ret; - } - - /* Probe for a socket backend. */ - ret = get_socket(fd); - if (!IS_ERR(ret)) { - /* Set the socket backend ops. */ - n->backend.get_file = &socket_backend_get_file; - n->backend.sendmsg = &socket_backend_sendmsg; - n->backend.peek_head_len = &socket_backend_peek_head_len; - n->backend.recvmsg = &socket_backend_recvmsg; - printk("[v1000] socket backend selected\n"); - } else { - printk("[v1000] no backend found\n"); - } - - return ret; -} - -static long v1000_net_set_backend(struct v1000_net *n, struct v1000_ring *vr, int fd) -{ - void *opaque; - int r = 0; - - mutex_lock(&vr->mutex); - - opaque = get_backend(n, fd); - if (IS_ERR(opaque)) { - r = PTR_ERR(opaque); - goto err_vr; - } - - /* start polling new backend */ - //v1000_net_disable_vr(n, vr); - vr->private_data = opaque; - if (r) - goto err_used; - //r = v1000_net_enable_vr(n, vr); - if (r) - goto err_used; - - mutex_unlock(&vr->mutex); - - return 0; - -err_used: - v1000_net_enable_vr(n, vr); - fput(n->backend.get_file(opaque)); -err_vr: - mutex_unlock(&vr->mutex); - return r; -} - -static ssize_t v1000_read(struct file* file_ptr, char __user * buffer, - size_t n, loff_t * offset_ptr) -{ - n = 0; - *offset_ptr += n; - - return n; -} - -static int v1000_set_memory(struct v1000_net * net) -{ - struct V1000Translation *newmem, *oldmem; - - /* Use the new table to translate rings and csb memory. */ - if (!(net->tx_desc = lookup_translation(net, net->config.tx_ring.phy, - net->config.tx_ring.num * sizeof(struct e1000_tx_desc)))) - return -EFAULT; - if (!(net->rx_desc = lookup_translation(net, net->config.rx_ring.phy, - net->config.rx_ring.num * sizeof(struct e1000_rx_desc)))) - return -EFAULT; - net->tx_hdr = net->config.tx_ring.hdr.virt; - net->rx_hdr = lookup_translation(net, net->config.rx_ring.hdr.phy, - net->config.rx_ring.num * sizeof(struct virtio_net_hdr)); - if (!net->rx_hdr) - return -EFAULT; - if (!(net->csb = lookup_translation(net, net->config.csb_phy, - sizeof(struct paravirt_csb)))) - return -EFAULT; - - printk("[v1000] virtuals: tx=%p, rx=%p, tx_hdr=%p, rx_hdr=%p, csb=%p\n", - net->tx_desc, net->rx_desc, net->tx_hdr, net->rx_hdr, net->csb); - - newmem = kmalloc(sizeof(struct V1000Translation), GFP_KERNEL); - if (!newmem) - return -ENOMEM; - - memcpy(newmem, &net->config.tr, sizeof(struct V1000Translation)); - - oldmem = rcu_dereference_protected(net->dev.memory, - lockdep_is_held(&net->dev->mutex)); - rcu_assign_pointer(net->dev.memory, newmem); - synchronize_rcu(); - kfree(oldmem); - - return 0; -} - -static int v1000_set_eventfds_ring(struct v1000_ring * vr, struct V1000RingConfig * vrc) -{ - vr->kick = eventfd_fget(vrc->ioeventfd); - if (IS_ERR(vr->kick)) - return PTR_ERR(vr->kick); - - vr->call = eventfd_fget(vrc->irqfd); - if (IS_ERR(vr->call)) - return PTR_ERR(vr->call); - vr->call_ctx = eventfd_ctx_fileget(vr->call); - - if (vrc->resamplefd != ~0U) { - vr->resample = eventfd_fget(vrc->resamplefd); - if (IS_ERR(vr->resample)) - return PTR_ERR(vr->resample); - vr->resample_ctx = eventfd_ctx_fileget(vr->resample); - } else { - vr->resample = NULL; - vr->resample_ctx = NULL; - } - - return 0; -} - -static int v1000_set_eventfds(struct v1000_net * net) -{ - int r; - - if ((r = v1000_set_eventfds_ring(&net->tx_ring, &net->config.tx_ring))) - return r; - if ((r = v1000_set_eventfds_ring(&net->rx_ring, &net->config.rx_ring))) - return r; - - return 0; -} - -static void v1000_print_configuration(struct v1000_net * net) -{ - int i; - struct V1000Config * cfg = &net->config; - - printk("[v1000] configuration:\n"); - printk("TX: phy=%llu, num=%u, hdr.virt=%p, io=%u, irq=%u, resample=%u\n", - cfg->tx_ring.phy, cfg->tx_ring.num, - cfg->tx_ring.hdr.virt, cfg->tx_ring.ioeventfd, - cfg->tx_ring.irqfd, cfg->tx_ring.resamplefd); - printk("RX: phy=%llu, num=%u, hdr.phy=%llu, io=%u, irq=%u, resample=%u\n", - cfg->rx_ring.phy, cfg->rx_ring.num, - cfg->rx_ring.hdr.phy, cfg->rx_ring.ioeventfd, - cfg->rx_ring.irqfd, cfg->rx_ring.resamplefd); - printk("rxbuf_size=%u, csb_phy=%llu, tapfd=%d\n", - cfg->rxbuf_size, cfg->csb_phy, cfg->tapfd); - for (i=0; iconfig.tr.num; i++) { - printk(" pa=%llu, len=%llu, va=%p\n", net->config.tr.table[i].phy, - net->config.tr.table[i].length, net->config.tr.table[i].virt); - } -} - -static int v1000_configure(struct v1000_net * net) -{ - int r; - - /* Configure. */ - if ((r = v1000_dev_set_owner(&net->dev))) - return r; - if ((r = v1000_set_memory(net))) - return r; - if ((r = v1000_set_eventfds(net))) - return r; - if ((r = v1000_net_set_backend(net, &net->rx_ring, net->config.tapfd))) - return r; - if ((r = v1000_net_set_backend(net, &net->tx_ring, net->config.tapfd))) - return r; - net->state.txnum = net->config.tx_ring.num; - net->state.rxnum = net->config.rx_ring.num; - - v1000_print_configuration(net); - - /* Start polling. */ - if (net->tx_ring.handle_kick && (r = v1000_poll_start(&net->tx_ring.poll, net->tx_ring.kick))) - return r; - if (net->rx_ring.handle_kick && (r = v1000_poll_start(&net->rx_ring.poll, net->rx_ring.kick))) - return r; - if ((r = v1000_net_enable_vr(net, &net->tx_ring))) - return r; - if ((r = v1000_net_enable_vr(net, &net->rx_ring))) - return r; - - return 0; -} - -static int v1000_access_ok(struct v1000_net * net) -{ - struct V1000Translation * tr = &net->config.tr; - int i; - - for (i=0; inum; i++) { - if (!access_ok(VERIFY_WRITE, tr->table[i].virt, tr->table[i].length)) - return -1; - } - - return !(access_ok(VERIFY_WRITE, net->tx_desc, - net->config.tx_ring.num * sizeof(struct e1000_tx_desc)) - && access_ok(VERIFY_WRITE, net->rx_desc, - net->config.rx_ring.num * sizeof(struct e1000_rx_desc)) - && access_ok(VERIFY_READ, net->tx_hdr, - net->config.tx_ring.num * sizeof(struct virtio_net_hdr)) - && access_ok(VERIFY_WRITE, net->rx_hdr, - net->config.rx_ring.num * sizeof(struct virtio_net_hdr)) - && access_ok(VERIFY_WRITE, net->csb, sizeof(struct paravirt_csb)) - ); -} - -static ssize_t v1000_write(struct file * file_ptr, const char __user * buffer, size_t n, loff_t * offset_ptr) -{ - struct v1000_net * net = (struct v1000_net *)file_ptr->private_data; - int res; - - /* TODO if n->configured?? */ - - mutex_lock(&net->dev.mutex); - - if (n != sizeof(struct V1000Config)) { - n = -EINVAL; - goto leave; - } - - /* Read the configuration from userspace. */ - if (copy_from_user(&net->config, buffer, sizeof(struct V1000Config))) { - printk(KERN_ALERT "v1000_first_write(): copy_from_user()\n"); - n = -EFAULT; - goto leave; - } - - //printk("[v1000] configuration read\n"); - if ((res = v1000_configure(net))) { - n = res; - goto leave; - } - - if ((res = v1000_access_ok(net))) { - n = res; - goto leave; - } - //printk("[v1000] configuration OK\n"); - net->configured = true; - - *offset_ptr += n; - -leave: - mutex_unlock(&net->dev.mutex); - - return n; -} - -static const struct file_operations v1000_fops = { - .owner = THIS_MODULE, - .release = v1000_release, - .open = v1000_open, - .write = v1000_write, - .read = v1000_read, - .llseek = noop_llseek, -}; - - -/* Device number associated to the v1000 char device. */ -static dev_t device_number; -static struct cdev v1000_cdev; -static struct class *cl; - -static int __init v1000_init(void) -{ - int ret; - - printk(KERN_ALERT "[v1000] Module loaded\n"); - - /* Dynamic allocation of a device number */ - if ((ret = alloc_chrdev_region(&device_number, 0, 1, "v1000")) < 0) { - printk(KERN_ALERT "alloc_chrdev_region() failed"); - goto exit_after_error; - } - printk(KERN_INFO "[v1000] Device number allocated = (%d,%d)\n", MAJOR(device_number), MINOR(device_number)); - if ((cl = class_create(THIS_MODULE, "chardrv")) == NULL) { - printk(KERN_ALERT "class_create() failed"); - unregister_chrdev_region(device_number, 1); - goto exit_after_error; - } - if (device_create(cl, NULL, device_number, NULL, "v1000") == NULL) { - printk(KERN_ALERT "device_create() failed"); - class_destroy(cl); - unregister_chrdev_region(device_number, 1); - goto exit_after_error; - } - - /* Registering a char device into the kernel */ - cdev_init(&(v1000_cdev), &v1000_fops); - v1000_cdev.owner = THIS_MODULE; - v1000_cdev.ops = &v1000_fops; - if ((ret = cdev_add(&v1000_cdev, device_number, 1))) { - device_destroy(cl, device_number); - class_destroy(cl); - unregister_chrdev_region(device_number, 1); - printk(KERN_ALERT "cdev_add() failed[%d]!\n", ret); - goto exit_after_error; - } - printk(KERN_INFO "[v1000] Char device added into the kernel\n"); - - return 0; - -exit_after_error: - printk(KERN_ALERT "[v1000] Module loading failed!\n" ); - return ret; -} - -static void v1000_exit(void) // __exit -{ - cdev_del(&v1000_cdev); - device_destroy(cl, device_number); - class_destroy(cl); - unregister_chrdev_region(device_number, 1); - printk(KERN_INFO "[v1000] module unloaded\n"); -} - -module_init(v1000_init); -module_exit(v1000_exit); - -MODULE_VERSION("0.0.1"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Vincenzo Maffione"); -MODULE_DESCRIPTION("Host kernel accelerator for e1000-paravirt"); -//MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR); -MODULE_ALIAS("devname:v1000"); diff --git a/private/LINUX/vhost-port/paravirt.h b/private/LINUX/vhost-port/paravirt.h deleted file mode 100644 index e8c49cb0b..000000000 --- a/private/LINUX/vhost-port/paravirt.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (C) 2013 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef NET_PARAVIRT_H -#define NET_PARAVIRT_H - -/* - Support for virtio-like communication between host (H) and guest (G) NICs. - - The guest allocates the shared Communication Status Block (csb) and - write its physical address at CSBAL and CSBAH (data is little endian). - csb->csb_on enables the mode. If disabled, the device acts a regular one. - - Notifications for tx and rx are exchanged without vm exits - if possible. In particular (only mentioning csb mode below), - the following actions are performed. In the description below, - "double check" means verifying again the condition that caused - the previous action, and reverting the action if the condition has - changed. The condition typically depends on a variable set by the - other party, and the double check is done to avoid races. E.g. - - // start with A=0 - again: - // do something - if ( cond(C) ) { // C is written by the other side - A = 1; - // barrier - if ( !cond(C) ) { - A = 0; - goto again; - } - } - - TX: start from idle: - H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new - transmissions, G always updates guest_tdt. If host_need_txkick == 1, - G also writes to the TDT, which acts as a kick to H (so pending - writes are always dispatched to H as soon as possible.) - - TX: active state: - On the kick (TDT write) H sets host_need_txkick == 0 (if not - done already by G), and starts an I/O thread trying to consume - packets from TDH to guest_tdt, periodically refreshing host_tdh - and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1, - and then does the "double check" for race avoidance. - - TX: G runs out of buffers - XXX there are two mechanisms, one boolean (using guest_need_txkick) - and one with a threshold (using guest_txkick_at). They are mutually - exclusive. - BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does - the double check. If H finds guest_need_txkick== 1 on a write - to TDH, it also generates an interrupt. - THRESHOLD: G sets guest_txkick_at to the TDH value for which it - wants to receive an interrupt. When H detects that TDH moves - across guest_txkick_at, it generates an interrupt. - This second mechanism reduces the number of interrupts and - TDT writes on the transmit side when the host is too slow. - - RX: start from idle - G starts with guest_need_rxkick = 1 when the receive ring is empty. - As packets arrive, H updates host_rdh (and RDH) and also generates an - interrupt when guest_need_rxkick == 1 (so incoming packets are - always reported to G as soon as possible, apart from interrupt - moderation delays). It also tracks guest_rdt for new buffers. - - RX: active state - As the interrupt arrives, G sets guest_need_rxkick = 0 and starts - draining packets from the receive ring, while updating guest_rdt - When G runs out of packets it sets guest_need_rxkick=1 and does the - double check. - - RX: H runs out of buffers - XXX there are two mechanisms, one boolean (using host_need_rxkick) - and one with a threshold (using host_xxkick_at). They are mutually - exclusive. - BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the - double check. If G finds host_need_rxkick==1 on updating guest_rdt, - it also writes to RDT causing a kick to H. - THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants - to receive a kick. When G detects that guest_rdt moves across - host_rxkick_at, it writes to RDT thus generates a kick. - This second mechanism reduces the number of kicks and - RDT writes on the receive side when the guest is too slow and - would free only a few buffers at a time. - - */ -struct paravirt_csb { - /* XXX revise the layout to minimize cache bounces. - * Usage is described as follows: - * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never - */ - /* these are (mostly) written by the guest */ - uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */ - uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */ - uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */ - uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */ - uint32_t guest_rdt; /* GW+ HR+ rx buffers available */ - uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */ - uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */ - uint32_t pad[9]; - - /* these are (mostly) written by the host */ - uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */ - uint32_t host_need_txkick; /* GR+ HW- start the iothread */ - uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep. - * set by the guest */ - uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */ - uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */ - uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */ - uint32_t host_isr; /* GR* HW* shadow copy of ISR */ - uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */ - uint32_t vnet_ring_high; /* Vnet ring physical address high. */ - uint32_t vnet_ring_low; /* Vnet ring physical address low. */ -}; - -#define NET_PARAVIRT_CSB_SIZE 4096 -#define NET_PARAVIRT_NONE (~((uint32_t)0)) - -#ifdef QEMU_PCI_H - -/* - * API functions only available within QEMU - */ - -void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal, - uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as); - -#endif /* QEMU_PCI_H */ - -#endif /* NET_PARAVIRT_H */ diff --git a/private/LINUX/vhost-port/test.c b/private/LINUX/vhost-port/test.c deleted file mode 100644 index 41da38f4e..000000000 --- a/private/LINUX/vhost-port/test.c +++ /dev/null @@ -1,682 +0,0 @@ -#include "buildpkt.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#ifndef u16 -#define u16 uint16_t -#endif -#include /* struct virtio_net_hdr */ - -#include "tun_alloc.h" -#include "paravirt.h" -#include "v1000_user.h" - - -/* ========================== Useful macros =============================== */ -typedef unsigned char bool; -#define false 0 -#define true 1 - -/* GCC compiler barrier (from Wikipedia). */ -#define compiler_barrier() do { \ - asm volatile("" ::: "memory"); \ - } while (0) -/* ======================================================================= */ - - -#define RATE /* Enable rating & statistics. */ -bool check_rx_payload = 1; -bool use_resamplefd = 0; -bool tx_context_descriptor = 1; - - - -/* The e1000 TX and RX descriptor rings. */ -#define NUM_DESCRIPTORS 256 -static struct e1000_tx_desc tx_desc_mem[NUM_DESCRIPTORS]; -static struct e1000_rx_desc rx_desc_mem[NUM_DESCRIPTORS]; - -/* TX and RX vnet-hdr rings. The first NUM_DESCRIPTORS are for - transmission, while the others are for reception. */ -static struct virtio_net_hdr vnet_hdr_rings[2*NUM_DESCRIPTORS]; - -/* The packet buffers. The first TX_SKBUFFS are for transmission, - while the others are for reception. */ -#define NUM_SKBUFFS 1000 -#define NUM_TX_SKBUFFS (NUM_SKBUFFS/2) -#define NUM_RX_SKBUFFS (NUM_SKBUFFS-NUM_TX_SKBUFFS) -static struct pkt skbuffs[NUM_SKBUFFS]; -static int skb_tx = 0; -static int skb_rx = 0; -#define ETH_FRAME_SIZE 490U -#define ETH_FRAME_SIZE_STR "490" -#define MAX_NUM_FRAGS 9 - -/* Fake physical base addresses. Please make sure they - don't overlap. */ -#define CSB_PHY 15000000 -#define TXRING_PHY 22000000 -#define RXRING_PHY 68000000 -#define VNET_RING_PHY 101000000 -#define SKBUFFS_PHY 258000000 - - -/* The communication status block. */ -static struct paravirt_csb csb_mem; - - -/* V1000 configuration. */ -struct V1000Config config; - -/* Flag to stop sender and receiver threads. */ -static int stop = 0; - -/* Create a new eventfd. */ -static void new_eventfd(uint32_t * fdp) -{ - int efd; - int initval = 0; - int flags = 0; - - if ((efd = eventfd(initval, flags)) < 0) { - perror("eventfd()\n"); - exit(EXIT_FAILURE); - } - *fdp = (uint32_t)efd; -} - -/* Set the ring parameters. */ -static void configure_ring(struct V1000RingConfig * rc, uint64_t phy, bool physical, uint64_t hdr_phy, void * hdr_virt) -{ - rc->phy = phy; - if (physical) - rc->hdr.phy = hdr_phy; - else - rc->hdr.virt = hdr_virt; - rc->num = NUM_DESCRIPTORS; - new_eventfd(&rc->ioeventfd); - new_eventfd(&rc->irqfd); - if (use_resamplefd) - new_eventfd(&rc->resamplefd); - else - rc->resamplefd = ~0U; -} - -/* Set a row of the translation table. */ -static void configure_table(struct V1000Config * cfg, unsigned idx, uint64_t phy, uint64_t length, void * virt) -{ - if (idx >= MAX_TRANSLATION_ELEMENTS) { - printf("idx too big (%d)\n", idx); - exit(EXIT_FAILURE); - } - - cfg->tr.table[idx].phy = phy; - cfg->tr.table[idx].length = length; - cfg->tr.table[idx].virt = virt; -} - -/* Configure the v1000 device. */ -static void configure(int vfd, const char * si, const char * ri, - struct V1000Config * cfg) -{ - char tapname[IFNAMSIZ]; - int n; - int tfd; - - configure_ring(&cfg->tx_ring, TXRING_PHY, false, 0, &vnet_hdr_rings); - configure_ring(&cfg->rx_ring, RXRING_PHY, true, VNET_RING_PHY, NULL); - - cfg->rxbuf_size = sizeof(struct pkt); - - cfg->csb_phy = CSB_PHY; - - memset(&cfg->tr, 0, sizeof(struct V1000Translation)); - configure_table(cfg, 0, cfg->tx_ring.phy, - cfg->tx_ring.num * sizeof(struct e1000_tx_desc), - &tx_desc_mem[0]); - configure_table(cfg, 1, cfg->rx_ring.phy, - cfg->rx_ring.num * sizeof(struct e1000_rx_desc), - &rx_desc_mem[0]); - configure_table(cfg, 2, SKBUFFS_PHY, NUM_SKBUFFS * sizeof(struct pkt), - &skbuffs[0]); - configure_table(cfg, 3, cfg->csb_phy, NET_PARAVIRT_CSB_SIZE, &csb_mem); - configure_table(cfg, 4, cfg->rx_ring.hdr.phy, - cfg->rx_ring.num * sizeof(struct virtio_net_hdr), - &vnet_hdr_rings[cfg->tx_ring.num]); - cfg->tr.num = 5; - - strcpy(tapname, "tap"); - strcpy(tapname + 3, ri); - tfd = tun_alloc(tapname, IFF_TAP | IFF_NO_PI | IFF_VNET_HDR); - if (tfd < 0) { - perror("tun_alloc()\n"); - exit(EXIT_FAILURE); - } - cfg->tapfd = tfd; - - /* Flush the configuration to the v1000 device. */ - n = write(vfd, cfg, sizeof(struct V1000Config)); - if (n != sizeof(struct V1000Config)) { - perror("v1000 configuration failed!\n"); - printf("write returned %d\n", n); - exit(EXIT_FAILURE); - } -} - -/* Closes all the file descriptors opened. */ -static void cleanup(int vfd, struct V1000Config * cfg) -{ - close(cfg->tx_ring.ioeventfd); - close(cfg->tx_ring.irqfd); - if (use_resamplefd) - close(cfg->tx_ring.resamplefd); - close(cfg->rx_ring.ioeventfd); - close(cfg->rx_ring.irqfd); - if (use_resamplefd) - close(cfg->rx_ring.resamplefd); - close(cfg->tapfd); - close(vfd); -} - -/* CSB initialization. */ -static void csb_init(struct paravirt_csb * csb) -{ - csb->guest_tdt = 0; - csb->guest_need_txkick = 0; - csb->guest_need_rxkick = 1; - csb->guest_csb_on = 1; - csb->guest_rdt = 0; - csb->guest_txkick_at = ~0; - csb->host_tdh = 0; - csb->host_need_txkick = 1; - csb->host_txcycles_lim = 1; - csb->host_txcycles = 0; - csb->host_rdh = 0; - csb->host_need_rxkick = 1; - csb->host_isr = 0; - csb->host_rxkick_at = 0; - csb->vnet_ring_high = 0; //XXX - csb->vnet_ring_low = 0; //XXX -} - -/* Prefill all the TX frames. */ -static void build_tx_frames(const char * si, const char * ri, - struct pkt * frames, int num) -{ -#define NUM_ARGS 10 - char * argv[NUM_ARGS]; - char argc; - uint32_t i; - - memset(frames, 0, num * sizeof(struct pkt)); - - /* Some memory for the string arguments. */ - for (i=0; iguest_tdt; - /* We need space for MAX_NUM_FRAGS and a context descriptor. */ - if (tx_descriptors_avail(tdt, ntc) < MAX_NUM_FRAGS+1) { - //printf("TX ring full\n"); - csb->guest_need_txkick = 1; - compiler_barrier(); - /* Doublecheck. */ - ntc = clean_used_tx_descriptors(tx_desc, ntc); - if (tx_descriptors_avail(tdt, ntc) >= MAX_NUM_FRAGS+1) - goto more_used; - if (read(cfg->tx_ring.irqfd, &event, sizeof(event)) - != sizeof(event)) { - perror("read(tx_ring.irqfd)\n"); - exit(EXIT_FAILURE); - } -more_used: - csb->guest_need_txkick = 0; - compiler_barrier(); - continue; - } - - /* Insert a context descriptor. */ - if (tx_context_descriptor) { - txc_desc[tdt].lower_setup.ip_config = 0; - txc_desc[tdt].upper_setup.tcp_fields.tucss = sizeof(frame->eh) + sizeof(frame->ip); /* 34 */ - txc_desc[tdt].upper_setup.tcp_fields.tucso = txc_desc[tdt].upper_setup.tcp_fields.tucss + 6; - txc_desc[tdt].upper_setup.tcp_fields.tucse = 0; /* Checksum up to the end of the frame. */ - txc_desc[tdt].cmd_and_length = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C - | E1000_TXD_CMD_RS; /* TODO remove this flag when implement next-to-watch */ - txc_desc[tdt].tcp_seg_setup.data = 0; - if (++tdt == NUM_DESCRIPTORS) - tdt = 0; - } - - /* Insert a new frame in the ring, using as many TX descriptors - as needed. */ - offset = 0; - frag_size = ETH_FRAME_SIZE / frags; - if (frag_size > 0xffff) { - printf("Fragment too big (%d)\n", frag_size); - return NULL; - } - while (offset < ETH_FRAME_SIZE) { - tx_desc[tdt].upper.data = 0; - tx_desc[tdt].lower.data = E1000_TXD_DTYP_D | E1000_TXD_CMD_DEXT - | E1000_TXD_CMD_RS; /* TODO remove this flag when implement next-to-watch */ - if (ETH_FRAME_SIZE - offset <= frag_size) { - frag_size = ETH_FRAME_SIZE - offset; - tx_desc[tdt].lower.data |= E1000_TXD_CMD_EOP - | E1000_TXD_CMD_RS; - } - /* Request the NIC to insert a TCP/UDP checksum by setting - the proper bit in the POPTS field of the first data - descriptor packet. */ - if (tx_context_descriptor && offset == 0) - tx_desc[tdt].upper.data |= ((E1000_TXD_POPTS_TXSM) << 8); - - tx_desc[tdt].buffer_addr = SKBUFFS_PHY + skb_tx - * sizeof(struct pkt) + offset; - tx_desc[tdt].lower.data |= frag_size; - offset += frag_size; - //printf("[tdt=%u]: phy=%lu, lower.data=%u\n", tdt, tx_desc[tdt].buffer_addr, tx_desc[tdt].lower.data); - if (++tdt == NUM_DESCRIPTORS) - tdt = 0; - } - rate_txpkts++; - if (++skb_tx == NUM_TX_SKBUFFS) - skb_tx = 0; - - compiler_barrier(); - - /* Kick the v1000 tx frontend (if is the case. */ - csb->guest_tdt = tdt; - if (csb->host_need_txkick) { - write(cfg->tx_ring.ioeventfd, &event, sizeof(event)); - rate_txkicks++; - //printf("TX kick\n"); - } - /*printf("ntc=%d, tdt=%d\n", ntc, tdt); - printf("txpkts=%d, txkicks=%d\n", rate_txpkts, rate_txkicks);*/ - } - - return NULL; -} - -struct sgvec { - uint64_t phy; - uint64_t len; -}; - -/* Prepare new available RX descriptors. */ -static uint32_t prepare_rx_descriptors(volatile struct e1000_rx_desc* desc, - uint32_t idx, int num) -{ - while (num) { - desc[idx].buffer_addr = SKBUFFS_PHY + - (NUM_TX_SKBUFFS + skb_rx) * sizeof(struct pkt); - desc[idx].length = 0; - desc[idx].csum = 0; - desc[idx].status = 0; - desc[idx].errors = 0; - desc[idx].special = 0; - if (++idx == NUM_DESCRIPTORS) - idx = 0; - if (++skb_rx == NUM_RX_SKBUFFS) - skb_rx = 0; - num--; - } - - return idx; -} - -static int check_rx_header(volatile struct virtio_net_hdr * hdr) -{ - return hdr->flags || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE || - hdr->hdr_len || hdr->gso_size || hdr->csum_start || - hdr->csum_offset; -} - -static void print_hex(const char * name, unsigned char * d, int size) -{ - int i; - - printf("%s: ", name); - for (i=0; i= sizeof(p->eh)) - print_hex("ethernet", (unsigned char *)&p->eh, sizeof(p->eh)); - if (len >= sizeof(p->eh) + sizeof(p->ip)) - print_hex("ip", (unsigned char *)&p->ip, sizeof(p->ip)); - if (len >= sizeof(p->eh) + sizeof(p->ip) + sizeof(p->udp)) { - print_hex("udp", (unsigned char *)&p->udp, sizeof(p->udp)); - print_hex("body", (unsigned char *)&p->body, len - - (sizeof(p->eh) + sizeof(p->ip) + sizeof(p->udp))); - } - printf("\n"); -} - -static int check_received_packet(struct sgvec * sg, int sgcnt) -{ - struct pkt packet; - struct pkt * p = &packet; - uint32_t * d; - int i; - int offset = 0; - int hdrlen = sizeof(p->eh) + sizeof(p->ip) + sizeof(p->udp); - - for (i=0; irx_ring.irqfd, &event, sizeof(event)) - != sizeof(event)) { - perror("read(tx_ring.irqfd)\n"); - exit(EXIT_FAILURE); - } - rate_rxintrs++; -again: - /* Disable notifications. */ - csb->guest_need_rxkick = 0; - compiler_barrier(); - - /* Receive and clean used descriptors. */ - while (rx_desc[ntr].status & E1000_RXD_STAT_DD) { - //printf("Received packet [len=%u,ntr=%u,rdt=%u]\n", rx_desc[ntr].length, ntr, csb->guest_rdt); - if (sgcnt == MAX_NUM_FRAGS) { - /* If this happens, there is a bug in the kernel code. */ - printf("BUG: oversized RX descriptors chain.\n"); - sgcnt = 0; /* Force to 0 to avoid buffer overflow. */ - } - if (!sgcnt) { - /* First frame fragment. */ - if (check_rx_header(rx_hdr + ntr)) { - printf("Bad viritio-net header\n"); - } - } - sg[sgcnt].phy = rx_desc[ntr].buffer_addr; - sg[sgcnt].len = rx_desc[ntr].length; - sgcnt++; - if (rx_desc[ntr].status & E1000_RXD_STAT_EOP) { - sg[sgcnt-1].len -= 4; /* Remove FSC/CRC. */ - check_received_packet(&sg[0], sgcnt); - rate_rxpkts++; - sgcnt = 0; -#ifdef RATE - if (rate_rxpkts == overflow) { - /* Rating report. */ - gettimeofday(&te, NULL); - usecs = (te.tv_sec - tb.tv_sec) * 1000000 + - te.tv_usec - tb.tv_usec; - printf("RX: %3.6f Mpps\n", rate_rxpkts/((double)usecs)); - printf("RXINTR: %3.6f Mpps\n", rate_rxintrs/((double)usecs)); - printf("\n"); - rate_rxpkts = rate_rxintrs = 0; - if (usecs < RATE_LB_US) - overflow *= 2; - else if (usecs > RATE_UB_US) - overflow /= 2; - gettimeofday(&tb, NULL); - } -#endif /* RATE */ - } - rx_desc[ntr].status = 0; - csb->guest_rdt = prepare_rx_descriptors(rx_desc, csb->guest_rdt, 1); - compiler_barrier(); - if (csb->host_rxkick_at == ntr) { - write(cfg->rx_ring.ioeventfd, &event, sizeof(event)); - rate_rxkicks++; - //printf("RX kick\n"); - } - if (++ntr == NUM_DESCRIPTORS) - ntr = 0; - } - - /* Reenable notifications. */ - csb->guest_need_rxkick = 1; - compiler_barrier(); - /* Doublecheck. */ - if (rx_desc[ntr].status & E1000_RXD_STAT_DD) { - goto again; - } - } - - return NULL; -} - -void usage() -{ - printf("CMD INDEX [s {INDEX,H}] [r]\n"); - exit(EXIT_FAILURE); -} - -int main(int argc, char ** argv) -{ - pthread_t sender_thread; - pthread_t receiver_thread; - const char * ri; - const char * si; - int enable_sender = 0; - int enable_receiver = 0; - int vfd; - int c; - - /* Program input parsing. */ - if (argc < 2) - usage(); - ri = argv[1]; - si = NULL; - for (c=2; c -#include -#include -#include -#include - -#include "tun_alloc.h" - - -int tun_alloc(char *dev, int flags) { - - struct ifreq ifr; - int fd, err; - char *clonedev = "/dev/net/tun"; - - /* Arguments taken by the function: - * - * char *dev: the name of an interface (or '\0'). MUST have enough - * space to hold the interface name if '\0' is passed - * int flags: interface flags (eg, IFF_TUN etc.) - */ - - /* open the clone device */ - if( (fd = open(clonedev, O_RDWR)) < 0 ) { - perror( "open(/dev/net/tun)" ); - return fd; - } - - /* preparation of the struct ifr, of type "struct ifreq" */ - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_flags = flags; /* IFF_TUN or IFF_TAP, plus maybe IFF_NO_PI */ - - if (*dev) { - /* if a device name was specified, put it in the structure; otherwise, - * the kernel will try to allocate the "next" device of the - * specified type */ - strncpy(ifr.ifr_name, dev, IFNAMSIZ); - } - - /* try to create the device */ - //err = ioctl(fd, 0x400454ca, (void *) &ifr); - err = ioctl(fd, TUNSETIFF, (void *) &ifr); - if( err < 0 ) { - perror( "ioctl(TUNSETIFF)" ); - close(fd); - return err; - } - - /* if the operation was successful, write back the name of the - * interface to the variable "dev", so the caller can know - * it. Note that the caller MUST reserve space in *dev (see calling - * code below) */ - strcpy(dev, ifr.ifr_name); - - /* this is the special file descriptor that the caller will use to talk - * with the virtual interface */ - return fd; -} diff --git a/private/LINUX/vhost-port/tun_alloc.h b/private/LINUX/vhost-port/tun_alloc.h deleted file mode 100644 index 664094e46..000000000 --- a/private/LINUX/vhost-port/tun_alloc.h +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -int tun_alloc( char *dev, int flags ); - -/* SHELL COMMANDS - per creare ed eliminare interfacce TUN/TAP persistenti (iproute2) - ip tuntap add mode tun name tun0 - ip tuntap add mode tun name tun1 - - per assegnare indirizzi IP alle interfacce - ip link set tun0 up - ip link set tun1 up - ip addr add 10.0.0.1/24 dev tun0 - ip addr add 10.0.0.2/24 dev tun1 - - bridging (non serve in questo caso) - brctl addbr br0 - brctl addif br0 tun0 - brctl addif br0 tun1 - ip addr del 10.0.0.1/24 dev tun0 - ip addr del 10.0.0.2/24 dev tun1 - ip link set br0 up - ip addr add 10.0.0.1/24 dev br0 - ... -*/ diff --git a/private/LINUX/vhost-port/v1000.c b/private/LINUX/vhost-port/v1000.c deleted file mode 100644 index cccd7c4f4..000000000 --- a/private/LINUX/vhost-port/v1000.c +++ /dev/null @@ -1,335 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * Copyright (C) 2006 Rusty Russell IBM Corporation - * - * Author: Michael S. Tsirkin - * - * Inspiration, some code, and most witty comments come from - * Documentation/virtual/lguest/lguest.c, by Rusty Russell - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Generic code for virtio server in host kernel. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "v1000.h" - - -static void v1000_poll_func(struct file *file, wait_queue_head_t *wqh, - poll_table *pt) -{ - struct v1000_poll *poll; - - poll = container_of(pt, struct v1000_poll, table); - poll->wqh = wqh; - add_wait_queue(wqh, &poll->wait); -} - -static int v1000_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, - void *key) -{ - struct v1000_poll *poll = container_of(wait, struct v1000_poll, wait); - - //printk("%p.v1000_poll_wakeup(), %lu, %lu\n",poll,(unsigned long)key, poll->mask); - if (!((unsigned long)key & poll->mask)) - return 0; - - v1000_poll_queue(poll); - return 0; -} - -void v1000_work_init(struct v1000_work *work, v1000_work_fn_t fn) -{ - INIT_LIST_HEAD(&work->node); - work->fn = fn; - init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; -} - -/* Init poll structure */ -void v1000_poll_init(struct v1000_poll *poll, v1000_work_fn_t fn, - unsigned long mask, struct v1000_dev *dev) -{ - init_waitqueue_func_entry(&poll->wait, v1000_poll_wakeup); - init_poll_funcptr(&poll->table, v1000_poll_func); - poll->mask = mask; - poll->dev = dev; - poll->wqh = NULL; - - v1000_work_init(&poll->work, fn); -} - -/* Start polling a file. We add ourselves to file's wait queue. The caller must - * keep a reference to a file until after v1000_poll_stop is called. */ -int v1000_poll_start(struct v1000_poll *poll, struct file *file) -{ - unsigned long mask; - int ret = 0; - - if (poll->wqh) - return 0; - - mask = file->f_op->poll(file, &poll->table); - if (mask) - v1000_poll_wakeup(&poll->wait, 0, 0, (void *)mask); - if (mask & POLLERR) { - if (poll->wqh) - remove_wait_queue(poll->wqh, &poll->wait); - ret = -EINVAL; - } - printk("%p.poll_start()\n", poll); - return ret; -} - -/* Stop polling a file. After this function returns, it becomes safe to drop the - * file reference. You must also flush afterwards. */ -void v1000_poll_stop(struct v1000_poll *poll) -{ - if (poll->wqh) { - remove_wait_queue(poll->wqh, &poll->wait); - poll->wqh = NULL; - } - printk("%p.poll_stop()\n", poll); -} - -static bool v1000_work_seq_done(struct v1000_dev *dev, struct v1000_work *work, - unsigned seq) -{ - int left; - - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq; - spin_unlock_irq(&dev->work_lock); - return left <= 0; -} - -static void v1000_work_flush(struct v1000_dev *dev, struct v1000_work *work) -{ - unsigned seq; - int flushing; - - spin_lock_irq(&dev->work_lock); - seq = work->queue_seq; - work->flushing++; - spin_unlock_irq(&dev->work_lock); - wait_event(work->done, v1000_work_seq_done(dev, work, seq)); - spin_lock_irq(&dev->work_lock); - flushing = --work->flushing; - spin_unlock_irq(&dev->work_lock); - BUG_ON(flushing < 0); -} - -/* Flush any work that has been scheduled. When calling this, don't hold any - * locks that are also used by the callback. */ -void v1000_poll_flush(struct v1000_poll *poll) -{ - v1000_work_flush(poll->dev, &poll->work); -} - -void v1000_work_queue(struct v1000_dev *dev, struct v1000_work *work) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->work_lock, flags); - if (list_empty(&work->node)) { - list_add_tail(&work->node, &dev->work_list); - work->queue_seq++; - wake_up_process(dev->worker); - } - spin_unlock_irqrestore(&dev->work_lock, flags); -} - -void v1000_poll_queue(struct v1000_poll *poll) -{ - v1000_work_queue(poll->dev, &poll->work); -} - -static void v1000_vr_reset(struct v1000_dev *dev, - struct v1000_ring *vr) -{ - vr->private_data = NULL; - vr->kick = NULL; - vr->call_ctx = NULL; - vr->call = NULL; -} - -static int v1000_worker(void *data) -{ - struct v1000_dev *dev = data; - struct v1000_work *work = NULL; - unsigned uninitialized_var(seq); - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); - use_mm(dev->mm); - - for (;;) { - /* mb paired w/ kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - - spin_lock_irq(&dev->work_lock); - if (work) { - work->done_seq = seq; - if (work->flushing) - wake_up_all(&work->done); - } - - if (kthread_should_stop()) { - spin_unlock_irq(&dev->work_lock); - __set_current_state(TASK_RUNNING); - break; - } - if (!list_empty(&dev->work_list)) { - work = list_first_entry(&dev->work_list, - struct v1000_work, node); - list_del_init(&work->node); - seq = work->queue_seq; - } else - work = NULL; - spin_unlock_irq(&dev->work_lock); - - if (work) { - __set_current_state(TASK_RUNNING); - work->fn(work); - if (need_resched()) - schedule(); - } else - schedule(); - - } - unuse_mm(dev->mm); - set_fs(oldfs); - return 0; -} - -long v1000_dev_init(struct v1000_dev * dev, struct v1000_ring * tx_ring, - struct v1000_ring * rx_ring) -{ - int i; - - dev->rings[0] = dev->tx_ring = tx_ring; - dev->rings[1] = dev->rx_ring = rx_ring; - - mutex_init(&dev->mutex); - dev->memory = NULL; - dev->mm = NULL; - spin_lock_init(&dev->work_lock); - INIT_LIST_HEAD(&dev->work_list); - dev->worker = NULL; - - for (i=0; i<2; i++) { - dev->rings[i]->dev = dev; - mutex_init(&dev->rings[i]->mutex); - v1000_vr_reset(dev, dev->rings[i]); - if (dev->rings[i]->handle_kick) - v1000_poll_init(&dev->rings[i]->poll, - dev->rings[i]->handle_kick, POLLIN, dev); - } - - return 0; -} - -/* Caller should have device mutex */ -long v1000_dev_check_owner(struct v1000_dev *dev) -{ - /* Are you the owner? If not, I don't think you mean to do that */ - return dev->mm == current->mm ? 0 : -EPERM; -} - -/* Caller should have device mutex */ -long v1000_dev_set_owner(struct v1000_dev *dev) -{ - struct task_struct *worker; - int err; - - /* Is there an owner already? */ - if (dev->mm) { - err = -EBUSY; - goto err_mm; - } - - /* No owner, become one */ - dev->mm = get_task_mm(current); - worker = kthread_create(v1000_worker, dev, "v1000-%d", current->pid); - if (IS_ERR(worker)) { - err = PTR_ERR(worker); - goto err_worker; - } - - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ - - - return 0; -err_worker: - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; -err_mm: - return err; -} - -void v1000_dev_stop(struct v1000_dev *dev) -{ - int i; - - for (i = 0; i<2; i++) { - if (dev->rings[i]->kick && dev->rings[i]->handle_kick) { - v1000_poll_stop(&dev->rings[i]->poll); - v1000_poll_flush(&dev->rings[i]->poll); - } - } -} - -/* Caller should have device mutex if and only if locked is set */ -void v1000_dev_cleanup(struct v1000_dev *dev) -{ - int i; - - for (i = 0; i<2; i++) { - if (dev->rings[i]->kick) - fput(dev->rings[i]->kick); - if (dev->rings[i]->call_ctx) - eventfd_ctx_put(dev->rings[i]->call_ctx); - if (dev->rings[i]->call) - fput(dev->rings[i]->call); - v1000_vr_reset(dev, dev->rings[i]); - } - /* No one will access memory at this point */ - kfree(rcu_dereference_protected(dev->memory, - false == - lockdep_is_held(&dev->mutex))); - RCU_INIT_POINTER(dev->memory, NULL); - WARN_ON(!list_empty(&dev->work_list)); - if (dev->worker) { - kthread_stop(dev->worker); - dev->worker = NULL; - } - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; -} - -/* This actually signals the guest, using eventfd. */ -void vhost_signal(struct v1000_dev *dev, struct v1000_ring *vr) -{ - /* Signal the Guest tell them we used something up. */ - if (vr->call_ctx) - eventfd_signal(vr->call_ctx, 1); -} - diff --git a/private/LINUX/vhost-port/v1000.h b/private/LINUX/vhost-port/v1000.h deleted file mode 100644 index 185ea14b6..000000000 --- a/private/LINUX/vhost-port/v1000.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef _VHOST_H -#define _VHOST_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -struct v1000_work; -typedef void (*v1000_work_fn_t)(struct v1000_work *work); - -struct v1000_work { - struct list_head node; - v1000_work_fn_t fn; - wait_queue_head_t done; - int flushing; - unsigned queue_seq; - unsigned done_seq; -}; - -/* Poll a file (eventfd or socket) */ -/* Note: there's nothing vhost specific about this structure. */ -struct v1000_poll { - poll_table table; - wait_queue_head_t *wqh; - wait_queue_t wait; - struct v1000_work work; - unsigned long mask; - struct v1000_dev *dev; -}; - -void v1000_work_init(struct v1000_work *work, v1000_work_fn_t fn); -void v1000_work_queue(struct v1000_dev *dev, struct v1000_work *work); - -void v1000_poll_init(struct v1000_poll *poll, v1000_work_fn_t fn, - unsigned long mask, struct v1000_dev *dev); -int v1000_poll_start(struct v1000_poll *poll, struct file *file); -void v1000_poll_stop(struct v1000_poll *poll); -void v1000_poll_flush(struct v1000_poll *poll); -void v1000_poll_queue(struct v1000_poll *poll); - -struct writeback_info { - uint8_t * addr; - uint8_t value; -}; - -struct v1000_ring; - -/* The v1000_ring structure describes a queue attached to a device. */ -struct v1000_ring { - struct v1000_dev *dev; - - struct mutex mutex; - struct file *kick; - struct file *call; - struct eventfd_ctx *call_ctx; - struct file *resample; - struct eventfd_ctx *resample_ctx; - - struct v1000_poll poll; - - /* The routine to call when the Guest pings us, or timeout. */ - v1000_work_fn_t handle_kick; - - struct iovec iov[UIO_MAXIOV]; - - struct writeback_info wb[UIO_MAXIOV]; - - /* Protected by virtual ring mutex. */ - void *private_data; -}; - -struct v1000_dev { - /* Readers use RCU to access memory table pointer - * log base pointer and features. - * Writers use mutex below.*/ - struct V1000Translation __rcu *memory; - struct mm_struct *mm; - struct mutex mutex; - struct v1000_ring * tx_ring; - struct v1000_ring * rx_ring; - struct v1000_ring * rings[2]; - spinlock_t work_lock; - struct list_head work_list; - struct task_struct *worker; -}; - -long v1000_dev_init(struct v1000_dev *, struct v1000_ring *, struct v1000_ring *); -void v1000_dev_cleanup(struct v1000_dev *); -void v1000_dev_stop(struct v1000_dev *); -int v1000_vr_access_ok(struct v1000_ring *vr); -long v1000_dev_set_owner(struct v1000_dev *dev); - - -#include "v1000_user.h" - -#endif diff --git a/private/LINUX/vhost-port/v1000_user.h b/private/LINUX/vhost-port/v1000_user.h deleted file mode 100644 index 0407f4e5d..000000000 --- a/private/LINUX/vhost-port/v1000_user.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef __V1000__USER__HH -#define __V1000__USER__HH - - -struct V1000TranslationElem { - uint64_t phy; - uint64_t length; - void * virt; -}; - -struct V1000Translation { -#define MAX_TRANSLATION_ELEMENTS 64 - struct V1000TranslationElem table[MAX_TRANSLATION_ELEMENTS]; - unsigned num; -}; - -struct V1000RingConfig { - uint64_t phy; - union { - uint64_t phy; /* For the RX ring. */ - void * virt; /* For the TX ring. */ - } hdr; - uint32_t num; - uint32_t ioeventfd; - uint32_t irqfd; - uint32_t resamplefd; -}; - -struct V1000Config { - struct V1000RingConfig tx_ring; - struct V1000RingConfig rx_ring; - uint32_t rxbuf_size; /* RX buffer size. */ - uint64_t csb_phy; /* CSB physical address. */ - uint32_t tapfd; /* Backend file descriptor. */ - struct V1000Translation tr; -}; - - -#include "e1000_regs.h" - - -#endif diff --git a/private/LINUX/vhost-port/vhost-code-tracking.txt b/private/LINUX/vhost-port/vhost-code-tracking.txt deleted file mode 100644 index 726edd4d9..000000000 --- a/private/LINUX/vhost-port/vhost-code-tracking.txt +++ /dev/null @@ -1,7 +0,0 @@ -# These are linux commit identifiers -# - 'root' is the commit from which I did the original porting. -# - 'last-included' is the last commit that the current v1000 -# is synchronized with. - -root:46aa92d1ba162b4b3d6b7102440e459d4e4ee255 -last-included:f7c6be404d8fa52c54ff931390aab01e5c7654d6 diff --git a/private/LINUX/wip-patches/diff--mellanox--30300--30800 b/private/LINUX/wip-patches/diff--mellanox--30300--30800 deleted file mode 100644 index 9cb61aa5d..000000000 --- a/private/LINUX/wip-patches/diff--mellanox--30300--30800 +++ /dev/null @@ -1,145 +0,0 @@ -diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c ---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700 -@@ -48,6 +48,39 @@ - #include "mlx4_en.h" - #include "en_port.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * This driver is split in multiple small files. -+ * The main device descriptor has type struct mlx4_en_priv *priv; -+ * and we attach to the device in mlx4_en_init_netdev() -+ * (do port numbers start from 1 ?) -+ * -+ * The reconfig routine is in mlx4_en_start_port() (also here) -+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. -+ * -+ * priv->num_frags ?? -+ * DS_SIZE ?? -+ * apparently each rx desc is followed by frag.descriptors -+ * and the rx desc is rounded up to a power of 2. -+ * -+ * Receive code is in en_rx.c -+ * priv->rx_ring_num number of rx rings -+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor -+ * rxr->size number of slots -+ * rxr->prod producer -+ * probably written into a mmio reg at *rxr->wqres.db.db -+ * trimmed to 16 bits. -+ * -+ * Rx init routine: -+ * mlx4_en_activate_rx_rings() -+ * mlx4_en_init_rx_desc() -+ * Transmit code is in en_tx.c -+ */ -+ -+#define NETMAP_MLX4_MAIN -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_setup_tc(struct net_device *dev, u8 up) - { - if (up != MLX4_EN_NUM_UP) -@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device - /* Set initial ownership of all Tx TXBBs to SW (1) */ - for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) - *((u32 *) (tx_ring->buf + j)) = 0xffffffff; -+#ifdef DEV_NETMAP -+ mlx4_netmap_tx_config(priv, i); -+#endif /* DEV_NETMAP */ - ++tx_index; - } - -@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d - en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); - - queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); -+#ifdef DEV_NETMAP -+ mlx4_netmap_attach(priv); -+#endif /* DEV_NETMAP */ - return 0; - - out: ---- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700 -@@ -41,6 +41,9 @@ - - #include "mlx4_en.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif /* !DEV_NETMAP */ - - static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, -@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx - ring = &priv->rx_ring[ring_ind]; - - ring->size_mask = ring->actual_size - 1; -+#ifdef DEV_NETMAP -+ if (nm_native_on(NA(priv->dev))) { -+ int saved_cons = ring->cons; -+ mlx4_en_free_rx_buf(priv, ring); -+ ring->cons = saved_cons; -+ mlx4_netmap_rx_config(priv, ring_ind); -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_update_rx_prod_db(ring); - } -- - return 0; - - err_buffers: -@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4 - void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) - { -+#ifdef DEV_NETMAP -+ if (nm_native_on(NA(priv->dev))) -+ ND("netmap mode, rx buf already freed"); -+ else -+#endif /* DEV_NETMAP */ - mlx4_en_free_rx_buf(priv, ring); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; -@@ -718,6 +739,11 @@ int mlx4_en_poll_rx_cq(struct napi_struc - struct mlx4_en_priv *priv = netdev_priv(dev); - int done; - -+#ifdef DEV_NETMAP -+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) { -+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done); -+ } else -+#endif /* DEV_NETMAP */ - done = mlx4_en_process_rx_cq(dev, cq, budget); - - /* If we used up all the quota - we're probably not done yet... */ ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mellanox/mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700 -@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol - - static u32 hashrnd __read_mostly; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, u32 size, - u16 stride) -@@ -396,6 +400,13 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) - - if (!spin_trylock(&ring->comp_lock)) - return; -+#ifdef DEV_NETMAP -+ /* XXX should be integrated with appropriate lock_wrapper manner? */ -+ if (netmap_tx_irq(cq->dev, cq->ring)) { -+ ND(5, "wakeup queue %d", cq->ring); -+ spin_unlock(&ring->comp_lock); -+ return; -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_process_tx_cq(cq->dev, cq); - mod_timer(&cq->timer, jiffies + 1); - spin_unlock(&ring->comp_lock); diff --git a/private/LINUX/wip-patches/diff--mlx4--20630--30200 b/private/LINUX/wip-patches/diff--mlx4--20630--30200 deleted file mode 100644 index 86383bb8f..000000000 --- a/private/LINUX/wip-patches/diff--mlx4--20630--30200 +++ /dev/null @@ -1,163 +0,0 @@ -diff -urp --exclude '*.o' --exclude '*.cmd' --exclude '*mod.c' drivers/net/ethernet/mellanox/mlx4/en_netdev.c ./mellanox/mlx4/en_netdev.c ---- drivers/net/ethernet/mellanox/mlx4/en_netdev.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_netdev.c 2012-09-27 00:05:22.703523430 -0700 -@@ -48,6 +48,39 @@ - #include "mlx4_en.h" - #include "en_port.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+/* -+ * This driver is split in multiple small files. -+ * The main device descriptor has type struct mlx4_en_priv *priv; -+ * and we attach to the device in mlx4_en_init_netdev() -+ * (do port numbers start from 1 ?) -+ * -+ * The reconfig routine is in mlx4_en_start_port() (also here) -+ * which is called on a mlx4_en_restart() (watchdog), open and set-mtu. -+ * -+ * priv->num_frags ?? -+ * DS_SIZE ?? -+ * apparently each rx desc is followed by frag.descriptors -+ * and the rx desc is rounded up to a power of 2. -+ * -+ * Receive code is in en_rx.c -+ * priv->rx_ring_num number of rx rings -+ * rxr = prov->rx_ring[ring_ind] rx ring descriptor -+ * rxr->size number of slots -+ * rxr->prod producer -+ * probably written into a mmio reg at *rxr->wqres.db.db -+ * trimmed to 16 bits. -+ * -+ * Rx init routine: -+ * mlx4_en_activate_rx_rings() -+ * mlx4_en_init_rx_desc() -+ * Transmit code is in en_tx.c -+ */ -+ -+#define NETMAP_MLX4_MAIN -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_setup_tc(struct net_device *dev, u8 up) - { - if (up != MLX4_EN_NUM_UP) -@@ -1042,6 +1075,9 @@ int mlx4_en_start_port(struct net_device - /* Set initial ownership of all Tx TXBBs to SW (1) */ - for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) - *((u32 *) (tx_ring->buf + j)) = 0xffffffff; -+#ifdef DEV_NETMAP -+ mlx4_netmap_tx_config(priv, i); -+#endif /* DEV_NETMAP */ - ++tx_index; - } - -@@ -1639,6 +1675,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d - en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); - - queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); -+#ifdef DEV_NETMAP -+ mlx4_netmap_attach(priv); -+#endif /* DEV_NETMAP */ - return 0; - - out: ---- drivers/net/ethernet/mellanox/mlx4/en_rx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_rx.c 2012-09-27 00:13:16.099550954 -0700 -@@ -41,6 +41,9 @@ - - #include "mlx4_en.h" - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include -+#endif /* !DEV_NETMAP */ - - static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, -@@ -365,9 +368,16 @@ int mlx4_en_activate_rx_rings(struct mlx - ring = &priv->rx_ring[ring_ind]; - - ring->size_mask = ring->actual_size - 1; -+#ifdef DEV_NETMAP -+ if (nm_native_on(NA(priv->dev))) { -+ int saved_cons = ring->cons; -+ mlx4_en_free_rx_buf(priv, ring); -+ ring->cons = saved_cons; -+ mlx4_netmap_rx_config(priv, ring_ind); -+ } -+#endif /* DEV_NETMAP */ - mlx4_en_update_rx_prod_db(ring); - } -- - return 0; - - err_buffers: -@@ -402,6 +412,11 @@ void mlx4_en_destroy_rx_ring(struct mlx4 - void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) - { -+#ifdef DEV_NETMAP -+ if (nm_native_on(NA(priv->dev))) -+ ND("netmap mode, rx buf already freed"); -+ else -+#endif /* DEV_NETMAP */ - mlx4_en_free_rx_buf(priv, ring); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; -@@ -692,6 +707,12 @@ out: - wmb(); /* ensure HW sees CQ consumer before we post new buffers */ - ring->cons = mcq->cons_index; - ring->prod += polled; /* Polled descriptors were realocated in place */ -+ -+ ND(5, "set_ci %d 0x%p val %d prod_db 0x%p val %d", -+ cq->ring, -+ mcq->set_ci_db, mcq->cons_index & 0xffffff, -+ ring->wqres.db.db, ring->prod & 0xffff); -+ - mlx4_en_update_rx_prod_db(ring); - ring->csum_ok += csum_ok; - ring->csum_none += csum_none; -@@ -718,6 +739,13 @@ int mlx4_en_poll_rx_cq(struct napi_struc - struct mlx4_en_priv *priv = netdev_priv(dev); - int done; - -+#ifdef DEV_NETMAP -+ static int cnt = 0; -+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- poll-rx-cq %d count %d", (int)cq->ring, cnt++); -+ if (netmap_rx_irq(cq->dev, cq->ring, &done)) { -+ ND("rx_irq %d for netmap, budget %d done %d", cq->ring, budget, done); -+ } else -+#endif /* DEV_NETMAP */ - done = mlx4_en_process_rx_cq(dev, cq, budget); - - /* If we used up all the quota - we're probably not done yet... */ ---- drivers/net/ethernet/mellanox/mlx4/en_tx.c 2012-09-11 20:50:55.982624673 -0700 -+++ ./mlx4/en_tx.c 2012-09-27 00:05:22.713523348 -0700 -@@ -55,6 +55,10 @@ MODULE_PARM_DESC(inline_thold, "threshol - - static u32 hashrnd __read_mostly; - -+#if defined(CONFIG_NETMAP) || defined(CONFIG_NETMAP_MODULE) -+#include /* extern stuff */ -+#endif /* CONFIG_NETMAP */ -+ - int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, u32 size, - u16 stride) -@@ -396,6 +400,17 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) - - if (!spin_trylock(&ring->comp_lock)) - return; -+#ifdef DEV_NETMAP // XXX unlock and return should be in the 'if' branch -+ static int cnt = 0; -+ ND(5,"XXXXXX-------XXXXXXXXXXX-------- tx-irq %d count %d", (int)cq->ring, cnt++); -+ if (netmap_tx_irq(cq->dev, cq->ring)) { -+ ND(5, "wakeup queue %d", cq->ring); -+ } else { -+ RD(5, "XXXXXXXXX tx_irq %d unexpected, ignoring", cq->ring); -+ } -+ spin_unlock(&ring->comp_lock); -+ return; -+#endif /* DEV_NETMAP */ - mlx4_en_process_tx_cq(cq->dev, cq); - mod_timer(&cq->timer, jiffies + 1); - spin_unlock(&ring->comp_lock); diff --git a/private/NOTES b/private/NOTES deleted file mode 100644 index 318a302f7..000000000 --- a/private/NOTES +++ /dev/null @@ -1,1179 +0,0 @@ -NOTE - THIS FILE IS PROBABLY OUTDATED - -20140128 -- generic on FreeBSD panics on lo0 when doing output - -20121227 -- netmap-epair.diff - attempt to build a driver for epair - -20121227 -- netmap-nfe.diff - partial diff to support nfe - -20121026 broadcom on 2.6.xx -- missing cnic_if.h (used by bnx2x) - ---- status and other development notes --- -20120824 devfs_get_cdevpriv() - curthread struct thread sys/proc.h - ->td_fpop struct file - ->f_cdevpriv struct cdev_privdata sys/fs/devfs/devfs_int.h - ->cdpd_data void * - - in mmap: - dev struct cdev, sys/conf.h -20120802 http://www.asciiflow.com/ ascii art - -20120724 barelli svn+ssh://27148317-unipi@onelab3.iet.unipi.it/usr/home/PPM10/27148317/thesis - -20120528 NM_BRIDGE - error on initialization order for locks. - Not a problem on linux, but it is on FreeBSD with witnesses. - -20120524 virtual bridging - -get_ifp lookup the fake bridge interface - returns the object with a reference - -netmap_if_new() allocates the software rings - -na->nm_register() puts the interface in netmap mode - in our case, attach to an (existing) bridge - - -20120503 interrupt dispatching - apic_vector.S::call lapic_handle_intr - `-> intr_execute_handlers() - `-> kern_intr.c::intr_event_handle() - `-> if ih_filter --> call filter - or if filter returns FILTER_SCHEDULE_THREAD - `-> schedule thread to execute handler - - in the lem driver, the filter is lem_irq_fast() which - calls taskqueue_enqueue to run lem_handle_rxtx. - In qemu it takes between 70 and 210k cycles (20..60us) - to schedule the task. - On the test machine, at least 10k ticks corresponding - to about 3us before the task runs (rarely we have 3k ticks). - -20120505 -- immediate operation for output and input - OUT: we get the mbuf, need to copy into the nm_buf and - kick the output queue. If the queue is idle we should - operate immediately, otherwise schedule a deferred - interrupt (txintr ?) and act on it. - -20120504 -- latency between filter and task - on the emulator (3.4GHz machine) up to 130k cycles, min 70k - on the i7-870 @2.93G the min is 3k cycles, more often 13k - and several 30k spikes. - - -20120503 -- prefetch and the like -+ added userspace flags to pkt-gen to enable various - prefetch and copies. - pkt-gen -o 1 prefetch send source - pkt-gen -o 2 access (not implemented) - pkt-gen -o 4 pkt_copy - takes from a static buffer and writes to buffers - spread in memory. The write buffer should absorb - the operation - pkt-gen -o 8 memcpy() -+ added dev.netmap.copy to test with in-kernel copies - dev.netmap.copy=1 bcopy - dev.netmap.copy=2 bcopy (later) - dev.netmap.copy=3 memcpy (later) - dev.netmap.copy=4 access (maybe ignored ?) - dev.netmap.copy=5 only prefetch - -+ test with different packet lengths (intr=3000) - 2048 - 2048 - 64 - 2048 - 128 - - - ---- buf_size: 2048-128, intr=3k, 4 cores at 900 mhz ---- - -- options=0 -- -- options=1 -- -- options=4 -- - nm.copy len=60 len=64 len=60 len=64 len=60 len=64 - 0 14.78 14.20 13.78 13.80 9.46 9.47 - 1 4.35 8.17* 4.29 7.88 3.75 6.25 - 2 4.71 7.75* 4.62 8.14* 4.01 6.42 - 3 2.85 2.85 2.81 2.75 2.57 2.57 - 4 11.70 11.70 12.35 12.35 8.78 8.78 - 5 13.98 14.00 13.04 13.04 9.11 9.11 - - ---- buf_size: 2048-64, intr=3k, 4 cores at 900 mhz ---- - -- options=0 -- -- options=1 -- -- options=4 -- - nm.copy len=60 len=64 len=60 len=64 len=60 len=64 - 0 14.78 14.20 13.72 13.70 9.45 9.44 - 1 4.35 7.68* 4.22 7.74 3.92 6.33 - 2 4.70 7.79 4.68 8.04* 4.11 6.52 - 3 2.85 2.74 2.80 2.75 2.56 2.55 - 4 12.31* 11.83* 12.31 12.28 8.78 8.78 - 5 13.94 13.93 12.96 12.98 9.03 9.09 - - ---- buf_size: 2048, intr=3k, 4 cores at 900 mhz ---- - -- options=0 -- -- options=1 -- -- options=4 -- - nm.copy len=60 len=64 len=60 len=64 len=60 len=64 - 0 14.20 8.81 8.84 - 1 6.90 3.78 6.08 - 2 7.82 3.98 6.16 - 3 2.73 2.44 2.58 - 4 12.40 12.36 8.33 8.32 - 5 14.20 8.87 8.67 - -------------- -20120419 netsend statistics (with various breakpoints) -sysctl dev.ix.0.enable_aim=0 -sysctl dev.ix.0.queue0.interrupt_rate=5000 -sysctl dev.ix.0.fc=0 -sysctl net.inet.drop ... - -call tree -send() -sendto() ----- within the kernel ------ R/W = lock, T= tail call, C = normall call, -kern/uipc_syscalls.c :: sys_sendto() -kern/uipc_syscalls.c :: sendit() -kern/uipc_syscalls.c :: kern_sendit() -kern/uipc_socket.c :: sosend() - so->so_proto->pr_usrreqs->pru_sosend = sosend_dgram -kern/uipc_socket.c :: sosend_dgram() - so->so_proto->pr_usrreqs->pru_send = udp_send -netinet/udp_usrreq.c :: udp_send() -netinet/udp_usrreq.c :: udp_output() -netinet/ip_output.c :: ip_output() - ifp->if_output = ether_output -net/if_ethersubr.c :: ether_output() - memcpy() or arpresolve() - 3 memcpy for MAC header - pf_find_mtag() and csum_flags - -net/if_ethersubr.c :: ether_output_frame() - check ether_ipfw - call ifp->if_transmit - ifp->if_transmit = ixgbe_mq_start - -dev/ixgbe/ixgbe.c :: ixgbe_mq_start() - IXGBE_TX_TRYLOCK() - ixgbe_mq_start_locked() - IXGBE_TX_UNLOCK() - -dev/ixgbe/ixgbe.c :: ixgbe_mq_start_locked() - drbr_needs_enqueue() 30ns aka buf_ring_empty - for (;;) { - ixgbe_xmit() - drbr_dequeue() - } - -dev/ixgbe/ixgbe.c :: ixgbe_xmit() - huge stack (32 descriptors) - bus_dmamap_load_mbuf_sg() - ixgbe_tso_setup() or ixgbe_tx_ctx_setup() - loop on descriptors - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); - -... drbr_dequeue() uses sys/sys/buf_ring.h - - - BREAK CLOCK SIZE TC NSEC KPPS - 0 2934 18 HPET 1289 775 - p5556 2934 18 HPET 8 118M - 20 2934 18 HPET 107 103 107 103 103 103 103 107 107 - 20x4 2934 18 HPET 107 - 21x4 2934 18 HPET 111 - 22x4 2934 18 HPET 111 same as 21 - 23x4 2934 18 HPET 112-115 - 24x4 2934 18 HPET 117-121 - 25x4 2934 18 HPET 135-141 - 40x4 2934 18 HPET 144-150 - 41x4 2934 18 HPET 157-167 insensitive to length - allocation m_uiotombuf() -> uma_zalloc(zone_mbuf) - uma_zalloc(): - critical_enter() - cache->uc_allocs++; - 42x4 2934 18 HPET 266-270 - 42x4 2934 180 HPET 297 - 42x4 2934 1080 HPET 312 - 51x4 2934 1080 HPET 412 - 52x4 2934 18 HPET 1282 (!XXX slower than 1080) - 52 2934 1080 HPET 515 - 56x4 2934 1080 HPET 1290 - 52x4 2934 1080 HPET 1325 (1256 in other tests) - - -20120417 netsend statistics (with various breakpoints) - tests on i7-870 at 2934 - - BREAK CLOCK SIZE TC NSEC KPPS - 20 2934 18 TSC-low 9786 - 20 2934 1418 TSC-low 9786 - 20 2933 18 TSC-low 8970 8651 alternate - 20 2933 1418 TSC-low 8970 - 20 900 18 TSC-low 2704 - 20 900 18 HPET 2685 2590 alternate - - nosend 2934 18 HPET 119000 i7-870 no send (drop port) - nosendx4 2934 18 HPET 118200 i7-870 no send (drop port) - 20 2934 18 HPET 9783 sys_sendto() - 20 2934 18 HPET 9634 send() - 20x4 2934 18 HPET 9606 send() four threads - 20 2934 18 HPET 104 9602 9267 alternate - 21 2934 18 HPET 111 8982 - 22 2934 18 HPET 9006 alt 8710 - 23 2934 18 HPET 8906 - 24 2934 18 HPET 117 8502 alt 8238 - 25 2934 18 HPET 136 7347 - - 40 2934 18 HPET 144 6903 to 10.0.0.1 - 40x4 2934 18 HPET 144 6720 to 10.0.0.1 - 41 2934 18 HPET 156 6404 to 10.0.0.1 down to 6017 - 41x4 2934 18 HPET 156 6423 to 10.0.0.1 down to 6017 - --- 41-42 is m_uiotombuf() - uiomove() - sys/kern/subr_uio.c::uiomove_faultflag(cp, n, uio, 0) - involves copyin in - /home/luigi/FreeBSD/head/sys/amd64/amd64/support.S: - 42 2934 18 HPET 279 3577 to 10.0.0.1 down to 3544 - 42x4 2934 18 HPET 279 3725 to 10.0.0.1 down to 3544 - 43 2934 18 HPET 283 3522 to 10.0.0.1 down to 3544 - - 30 2934 18 HPET 292 3418 to 10.0.0.1 - 31 2934 18 HPET 340 2936 - - 50 2934 18 HPET 361 2765 to 10.0.0.1 - 50x4 2934 18 HPET 361 2802 to 10.0.0.1 - 51 2934 18 HPET 2635 to 10.0.0.1 - 51x4 2934 18 HPET 2689 to 10.0.0.1 - 52 2934 18 HPET 477 2093 to 10.0.0.1 - 52x4 2934 18 HPET 775 to 10.0.0.1 - --- 52-53 is the pfil call - 53 2934 18 HPET 613 to 10.0.0.1 (not here ?) - 54 2934 18 HPET 2090 to 10.0.0.1 also 2031 2046 ... - 54x4 2934 18 HPET 783 to 10.0.0.1 also 2031 2046 ... - 55 2934 18 HPET 534 1871 to 10.0.0.1 - 55x2 2934 18 HPET 1555 to 10.0.0.1 two threads - 55x4 2934 18 HPET 770 to 10.0.0.1 four threads - - 0 2934 18 HPET 968 1032 to 127.0.0.1 - 0 2934 18 HPET 622 to 10.0.0.1 (via ix0) - 0x2 2934 18 HPET 450 to 10.0.0.1 (via ix0) - 0x4 2934 18 HPET 351 to 10.0.0.1 (via ix0) - 0 2934 18 HPET 322 to 10.0.0.1 (and netmap-bridge) - -20120416 XXX BUG - em lock issue in em_netmap_init(), try to remove the - callback to shut down handlers - -20120407 sendto and other functions - -Measure netsend (and sendto() ) on various machines and dropping -the packet at different places in the stack. -sysctl kern.ipc.drop_send=N picks the place where packets are dropped -Tests run using tools/tools/netrate/netsend - -CONFIG QEMU LE-2300 i7-3400 - i7-3400 -l=18, no send 18.9 86.4 211 -l=18, 1 thread, pipe drop .0436 1.400 -l=1400, 1 thread, pipe drop .0393 1.300 -l=18, 2 thread, pipe drop .943 -l=1400, 2 thread, pipe drop .857 -l=18, 5 thread, pipe drop .51 -l=1400, 5 thread, pipe drop .50 - - -20120407 syscall path -lib/libc/net/Symbol.map - defines a few symbols that the linker is supposed to export - -lib/libc/net/send.c - send() calls _sendto() - -./libc/include/namespace.h:#define sendto _sendto - -lib/libc/sys/Symbol.map - FBSDprivate_1.0 _sendto, __sys_sendto - -the threading library defines them -./libthr/thread/thr_syscalls.c - ssize_t - __sendto(int s, const void *m, size_t l, int f, const struct sockaddr *t, - socklen_t tl) - { - struct pthread *curthread = _get_curthread(); - ssize_t ret; - - _thr_cancel_enter(curthread); - ret = __sys_sendto(s, m, l, f, t, tl); - _thr_cancel_leave(curthread, ret <= 0); - return (ret); - } - - -head/sys/kern/syscall.master -; Processed to created init_sysent.c, syscalls.c and syscall.h. - -lib/libc/i386/SYS.h -#define SYSCALL(x) 2: PIC_PROLOGUE; jmp PIC_PLT(HIDENAME(cerror)); \ - ENTRY(__CONCAT(__sys_,x)); \ - .weak CNAME(x); \ - .set CNAME(x),CNAME(__CONCAT(__sys_,x)); \ - .weak CNAME(__CONCAT(_,x)); \ - .set CNAME(__CONCAT(_,x)),CNAME(__CONCAT(__sys_,x)); \ - mov __CONCAT($SYS_,x),%eax; KERNCALL; jb 2b - -#define RSYSCALL(x) SYSCALL(x); ret; END(__CONCAT(__sys_,x)) - -#define PSEUDO(x) 2: PIC_PROLOGUE; jmp PIC_PLT(HIDENAME(cerror)); \ - ENTRY(__CONCAT(__sys_,x)); \ - .weak CNAME(__CONCAT(_,x)); \ - .set CNAME(__CONCAT(_,x)),CNAME(__CONCAT(__sys_,x)); \ - mov __CONCAT($SYS_,x),%eax; KERNCALL; jb 2b; ret; \ - END(__CONCAT(__sys_,x)) - -/* gas messes up offset -- although we don't currently need it, do for BCS */ -#define LCALL(x,y) .byte 0x9a ; .long y; .word x - -#define KERNCALL int $0x80 - -The main syscall in lib/libc/i386/sys/syscall.S - -ENTRY(syscall) - pop %ecx /* rta */ - pop %eax /* syscall number */ - push %ecx - KERNCALL - push %ecx /* need to push a word to keep stack frame intact - upon return; the word must be the return address. */ - jb 1f - ret -1: - PIC_PROLOGUE - jmp PIC_PLT(HIDENAME(cerror)) - - - --------------- -20120313 unetstack and linux netqueue - http://www.ioremap.net/archive/unetstack/ - http://www.dnull.com/Alpine/ - userspace stack - -20120306 receive speed - - ixgbe seems to have some rx losses when used with receive - interrupt mitigation. Symptoms are the card is missing ~0.2% - of the incoming traffic. - As a workaround, set by setting dev.netmap.netmap_no_pendintr=1 - makes the receiver not lose packets. - -20120306 behaviour with one tx queue - report: - 0 5.678 - 1 11.13 - 3 11.53 - 7 11.85 - 15 12.10 - 255 12.40 - Increasing the interrupt rate marginally improves the - behaviour but never better than 12.48Mpps. - Increasing the report frequency does not seem to help - (actually, it harms) - -Setting TXDCTL.PTHRESH and HTHRESH improves the rate. - - Using dd for the status reporting does not seem to work - it - requires the use of RS on every descriptor, which slows down - the card. - TXDCTL.WTHRESH - - Options: a) TDH is updated late, check the bits - b) the descriptor read is delayed. - -# $Id$ - -20120128 select/usleep comparison FreeBSD Linux OSX - -select | Iterations/second -timeout | FBSD | Linux | OSX -usec | 9.0 | Vbox | 10.6 ---------+-------+-------+---------- - 1 500 15.0k 150k - 10 500 12.9k 65k - 50 500 8.6k 15k - 100 500 6.0k 7.5k - 500 500 1744 1620 - 1000 500 922 880 - 1500 331 629 613 - 2000 331 477 470 - - -20120112 RELENG_8 and RELENG_9 - Original code committed in HEAD r227614 - svn diff -r 227613:227614 svn+ssh://svn.freebsd.org/base/head - New files: - share/man/man4/netmap.4 - sys/dev/netmap/ - sys/net/netmap.h - sys/net/netmap_user.h - tools/tools/netmap/ - Patches: (sys/conf done in 227845) see netmap-conf.diff - share/man/man4/Makefile - sys/conf/NOTES - sys/conf/files - sys/conf/options - - Driver changes (done later) - sys/dev/e1000/if_igb.c - sys/dev/e1000/if_lem.c - sys/dev/e1000/if_em.c - sys/dev/re/if_re.c - sys/dev/ixgbe/ixgbe.c - -20111207 lr performance tests - - ixgbe, RELENG_8 picobsd, no IPFW, no INVARIANTS, no i586 - software LRO even on 82599 - - default latency is 16us, l=0 means no interrupt mitigation. - lro is the software implementation of lro, - hwlro is the hardware one (on 82599) - - Summary: - - - hardware checksum seems to help a lot on the tx side - but practically useless on the receive side. - - - with default interrupt mitigation, setting - HWCSUM and TSO on the sender is really disruptive; - (while it seems to help a bit with l=0) - - - lro helps a lot on the receive side. - - - the software lro on the transmit side is detrimental, - not sure why (acks collapsed too much ?) - Disabling it on pure acks - The sw version is actually pretty good., but on the tx side - the software version kills performance. - it really starves - - Peak - Tput transmitter receiver - ======= ======================= ======================== - 4975 -csum,-tso,-lro -csum,-tso,-lro - 5050 -csum,-tso,-lro, w 100 -csum,-tso,-lro - - 5350 -csum,-tso,-lro +csum,-tso,+lro - 5500 -csum,-tso,-lro, w 100 +csum,-tso,+lro - 6000 -csum,-tso,-lro, w 150 +csum,-tso,+lro - 6000 -csum,-tso,-lro, w 200 +csum,-tso,+lro - - 8000 csum,-tso,-lro +csum,-tso,+lro - 3950 csum,-tso,+lro +csum,-tso,+lro - - 3144 -csum,tso,-lro +csum,-tso,+lro - 1600 csum,tso,-lro +csum,-tso,+lro - 2600 csum,tso,-lro -csum,-tso,-lro - 5200 -csum,tso,-lro -csum,-tso,-lro - - 9400 csum,tso,-lro,l=0 +csum,-tso,hwlro - 8400 -csum,tso,-lro,l=0 +csum,-tso,hwlro - 7700 csum,tso,-lro,l=0 +csum,-tso,-lro (6.3 to 7.7) - - 8000 csum,-tso,lro, w 100 +csum,-tso,hwlro - 7500 csum,-tso,lro, w 1000 +csum,-tso,hwlro cache effect ? - -20111107 lr - forked version for the release - -20111021 - cache and memory latencies for various architectures - http://arstechnica.com/gadgets/news/2011/10/can-amd-survive-bulldozers-disappointing-debut.ars - - Similarly, the cache and main memory latencies are longer than - they are for K10 (four cycles compared to three for level 1 - cache; 21 cycles compared to 14 or 15 for level 2; 65 compared - to 55 or 59 for level 3; and 195 versus 182 or 157 cycles for - main memory). K10's latencies were already worse overall than - Sandy Bridge's (which boasts 4, 11, 25, and 148 cycle latencies, - from level 1 through to main memory), and Bulldozer makes them - worse still. - - -20111003 - New measures: RX throughput vs burst size - Take new RX throughput measures, varying the number of hardware queues - and the value of the flag enabling the fast path for the poll handler. - - During these experiments we measured the throughput based on the - reception of 64+4 bytes packets - - 4 que 4 que 1 que - 1 thr 1 thr 1 thr - 1 cor 1 cor 1 cor - burst fast slow fast - ===== ===== ===== ===== - 1 2.11 0.30 2.37 - 2 4.09 0.60 4.56 - 4 7.70 1.20 8.32 - 8 13.55 2.34 14.20 - 16 14.20 4.60 14.20 - 32 14.20 8.01 14.20 - 64 14.20 14.20 14.20 - 1024 14.20 14.20 14.20 - - It seems the throughput obtained with the poll fast path is 7x the one - obtained with the slow one; that is confirmed by the fact that we need - a burst equal to 8 to obtain a throughput comparable with the fast - poll and unitary burst. - - -20111003 - New measures: TX throughput vs burst size - Take new TX throughput measures varying the number of hardware queues - and the size of burst; like in the previous update, both adaptive - interrupt moderation and maximum interrupt rate have been left to its - default value. - XXX much better values checking TDH only when avail == 0 - - 4 que 2 que 1 que - 1 thr 1 thr 1 thr - burst 1 cor 1 cor 1 cor - ===== ===== ===== ===== - 1 1.05 0.89 0.77 - 2 1.81 1.71 1.53 - 4 3.51 3.41 2.92 - 8 6.58 6.38 5.90 - 16 11.60 11.22 10.41 - 32 14.88 14.88 12.49 - 1024 12.49 - - -20111003 - New measures: throughput vs clock speed - Take new TX throughput measures varying the number of threads/hardware - queues; adaptive interrupt moderation was kept active and maximum - interrupt rate was left to its default value. - - All the following measures are relative to the Intel 10 Gbe adapter. - - 4 que 4 que 2 que 2 que 1 que - 4 thr 1 thr 2 thr 1 thr 1 thr - freq 4 cor 1 cor 2 cor 1 cor 1 cor - ==== ===== ===== ===== ===== ===== - 150 5.31 2.17 2.88 1.81 1.70 - 300 10.13 4.57 5.88 3.97 3.49 - 450 14.88 7.46 8.04* 6.35 5.13 - 600 10.87 11.54 8.65 6.92 - 750 13.64 14.55 10.67 8.98 - 900 14.88 14.88 12.51 11.67 - 1050 14.00 12.60 - 1200 14.88 12.60 - 2934 12.60 - - -20110926 - New throughput measures. - Given the introduction of a couple of debugging features which caused - some slowdown in terms of TX and RX throughput, we decided to opt-out - such features with pre-processor defines, and take again throughput - measurements (here we report TX results only): - - cpu=150MHz queues=4 cores=1 threads=1 throughput= 2.17 Mpps - cpu=300MHz queues=4 cores=1 threads=1 throughput= 4.57 Mpps - cpu=450MHz queues=4 cores=1 threads=1 throughput= 7.53 Mpps - cpu=600MHz queues=4 cores=1 threads=1 throughput= 10.88 Mpps - cpu=750MHz queues=4 cores=1 threads=1 throughput= 13.62 Mpps - cpu=900MHz queues=4 cores=1 threads=1 throughput= 14.84 Mpps - - cpu=150MHz queues=4 cores=4 threads=4 throughput= 5.35 Mpps - cpu=300MHz queues=4 cores=4 threads=4 throughput= 9.75 Mpps - cpu=450MHz queues=4 cores=4 threads=4 throughput= 14.88 Mpps - - We did not measured the throughput with 2 cores and 2 queues, because - at the moment we are interested in not having introduced slow - operations; a more complete set of data will be taken next week. - - There is always the super-linear trend which needs to be explained. - - -20110921 - More latency experiments with different hosts and tx rages - - Here is the summary of the results collected while measuring the RTT - between two hosts: - - 1 experiment - - hosts: BSD - BSD - - CPU freq: 2800 MHz - - packet size: 98 bytes - - transmit rates: 100 Hz, 1 KHz, 10 KHz - 100) 0.024/0.031/0.042/0.002 ms - 1000) 0.024/0.030/0.042/0.001 ms - 10000) 0.013/0.015/0.043/0.001 ms - - 2 experiment - - hosts: BSD - Linux - - CPU freq: 2800 MHz - - packet size: 98 bytes - - transmit rates: 100 Hz, 1 KHz, 10 KHz - 100) 0.036/0.078/0.091/0.003 ms - 1000) 0.057/0.078/0.090/0.002 ms - 10000) 0.013/0.022/0.092/0.005 ms - - 3 experiment - - hosts: BSD - BSD w/ netmap bridge - - CPU freq: 2800 MHz - - packet size: 98 bytes - - transmit rates: 100 Hz, 1 KHz, 10 KHz - 100) 0.055/0.065/0.073/0.003 ms - 1000) 0.041/0.060/0.076/0.002 ms - 10000) 0.026/0.033/0.133/0.007 ms - - Considerations: - - above 1 Khz of transmit frequency all measured times are way below - data collected at lower freqs; we don't know why, probably the - application is entering in *flood* mode and timestamps are no more - reliable. More investigation is needed. - - using the netmap bridge with high TX rates, we get high values of - RTT for the first packets; afterwards the average decreases. Again, - more investigation is needed. - - Additional information can be found: - - stats/ping-bsd-bsd-100-2800 - - stats/ping-bsd-bsd-1k-2800 - - stats/ping-bsd-bsd-10k-2800 - - stats/ping-bsd-linux-100-2800 - - stats/ping-bsd-linux-1k-2800 - - stats/ping-bsd-linux-10k-2800 - - stats/ping-bsd-netmap-100-2800 - - stats/ping-bsd-netmap-1k-2800 - - stats/ping-bsd-netmap-10k-2800 - -20110920 - RTT break up measures - Using a patched version of `ping' we have been able to break up - measured RTT into small chunks, each one associated to a specific - network operation. - - The following is the summary of the measures taken on a machine with - two interfaces in loopback, and interrupt latency reduced as much as - possible, and an high transmit rate: - - ~ 6 us to move a message between userspace and kernel: we expected - a smaller time, but maybe we are taking the userspace timestamp to - early - - ~ 11.7 us measures the time between A notifying the NIC of the packet - to send, and B schedule the interrupt routine - - ~ 0.4 us between interrupt and rxeof: we expected this value, given - that the function call is one of the first operation executed - inside the interrupt service routine - - ~ 5 us the latency introduced by the receiver processing the ICMP - message - - ~ 11.9 us measures the time between B notifying the NIC of the packet - to send, and A schedule the interrupt routine; as we expected, this - value is kind of equal to delta #2. - - ~ 0.4 us between interrupt and rxeof - - ~ 20 us to traverse the network stack and reach userspace - - Hence: - - from userspace to userspace: ~50 us - - from userspace to kernelspace: ~30 us which is what we get from - standard ping. - - For the whole data set collected, have a look: - - stats/patchedping-c1000-i0005-s68-2800-bulklat - - stats/patchedping-c1000-i0005-s68-2800-lowlat - - stats/patchedping-c1000-i0005-s68-2800-zerolat - - -20110916 - More tests on ping latency (Linux) with a varying transmit rate. - We took the measures again varying the transmit rate to investigate the - origin of this high RTT. An higher transmit size does not produces - better results (they are kind of stable); on the other hand, increasing - the transmit rate: - - - hosts: Linux->Linux - - command: ping -c 100000 -i 0.0001 -s [56, 200, 400, 800, 1200, 1450] - 56) min/avg/max: 0.022/0.033/0.123 - 200) min/avg/max: 0.027/0.035/0.146 - 400) min/avg/max: 0.020/0.035/0.145 - 800) min/avg/max: 0.029/0.034/0.136 - 1200) min/avg/max: 0.032/0.042/0.148 - 1450) min/avg/max: 0.032/0.052/0.143 - - This time, collected data seem to be more reasonable than before: - moreover this suggests that interrupt mitigation (still active on - Linux) is more effective under *heavy* work load. - - -20110915 - Test ping latency with different hosts and packet sizes - 1 experiment - - hosts: BSD->BSD - - command: ping -c 1000 -i 0.005 -s [56, 200, 400, 800, 1200, 1450] - 56) min/avg/max: 0.049/0.050/0.055 - 200) min/avg/max: 0.049/0.050/0.055 - 400) min/avg/max: 0.049/0.050/0.055 - 800) min/avg/max: 0.051/0.052/0.056 - 1200) min/avg/max: 0.051/0.052/0.057 - 1450) min/avg/max: 0.052/0.053/0.057 - - 2 experiment - - hosts: BSD->Linux - - command: ping -c 1000 -i 0.005 -s [56, 200, 400, 800, 1200, 1450] - 56) min/avg/max: 0.073/0.085/0.091 - 200) min/avg/max: 0.057/0.086/0.091 - 400) min/avg/max: 0.059/0.087/0.093 - 800) min/avg/max: 0.063/0.089/0.095 - 1200) min/avg/max: 0.082/0.092/0.097 - 1450) min/avg/max: 0.078/0.093/0.101 - - 3 experiment - - hosts: Linux->BSD - - command: ping -c 1000 -i 0.006 -s [56, 200, 400, 800, 1200, 1450] - * transmit rate has been increased to prevent icmp mitigation on - the RX side: maybe there is a syctl to disable it * - 56) min/avg/max: 0.070/0.074/0.083 - 200) min/avg/max: 0.070/0.074/0.087 - 400) min/avg/max: 0.070/0.074/0.082 - 800) min/avg/max: 0.071/0.075/0.084 - 1200) min/avg/max: 0.071/0.075/0.088 - 1450) min/avg/max: 0.071/0.075/0.086 - - 4 experiment - - hosts: Linux->Linux - - command: ping -c 1000 -i 0.005 -s [56, 200, 400, 800, 1200, 1450] - 56) min/avg/max: 0.060/0.102/0.123 - 200) min/avg/max: 0.065/0.106/0.140 - 400) min/avg/max: 0.062/0.107/0.140 - 800) min/avg/max: 0.068/0.113/0.150 - 1200) min/avg/max: 0.075/0.113/0.157 - 1450) min/avg/max: 0.083/0.117/0.140 - - Collected values are still to high. - - -20110915 - Interrupt mitigation study - Disabling `ixgbe' adaptive interrupt moderation, and varying - max_interrupt_rate, we used `vmstat - i' to measure the number of - received interrupts to find out whether such setting gets correctly - honored by device driver or not. - - All the experiment have been taken on the receiver side (on the - transmit one, we had `pkt-gen' sending wire-limit traffic) - - 1 legacy driver, low interrupt latency - - max_intr_rate: 62500 - - duration: 6.73 s - - interrupts before: ~106289 - - interrupts after: ~212437 - - interrupts per second: ~15772 - - 2 pkt-gen (RX), low interrupt latency - - max_intr_rate: 62500 - - duration: 6.73 s - - interrupts before: ~212515 - - interrupts after: ~315407 - - interrupts per second: ~15288 - - 3 legacy driver, high interrupt latency - - max_intr_rate: 6666 - - duration: 6.73s - - interrupts before: ~172 - - interrupts after: ~108285 - - interrupts per second: ~16064 - how is this possible? should this value be under 6666? Does this - count the number of received interrupts, or the number of served - ones? - - 4 pkt-gen (RX), high interrupt latency - - max_intr_rate: 6666 - - duration: 6.73s - - interrupts before: ~68450 - - interrupts after: ~136538 - - interrupts per second: ~10115 - why we are still counting more interrupts than configured limit? - why we are getting less interrupts than before? - - These are measures that need to be investigated more. - - -20110914 - Latency tests (part 2) - In these tests we wanted to measure the RTT deltas with different - packet sizes and different OSes. In particular we sent packets of 64, - 98 and 132 bytes (`ping -s 56/90/124') and we tested all the possible - configurations of sender / receiver hosting different OSes (FreeBSD and - Linux) - - Size 10 Gbe 1 Gbe - ====== ====== ===== - - 64 B 33 us 57 us - FreeBSD-FreeBSD 98 B 37 us 58 us - 132 B 38 us 60 us - - 64 B 88 us 132 us - FreeBSD-Linux 98 B 90 us 140 us - 132 B 92 us 150 us - - 64 B 110 us 153 us - Linux-FreeBSD 98 B 112 us 169 us - 132 B 113 us 178 us - - 64 B 169 us 182 us - Linux-Linux 98 B 170 us 185 us - 132 B 176 us 188 us - - Collected measures are likely to be wrong: why a ping on a Linux - machine takes so long? We are better off taking again the measures and - fix some ping variables like for example the message interval. - - -20110913 - Latency tests (part 1) - In these tests we measured the latency measured by the `ping' - application; we repeated the tests using the standard driver, then - enabling NIC off-loading features, and finally disabling interrupt - mitigation. In the end, we tried to use the brige application (built on - top of netmap) which links hardware with stack queues. - - OS: FreeBSD 9.0 beta - Kernel: head (r225462) - - NIC: Intel 10Gbe (82599) - Setup: ping - - standard rxcsum no intr netmap - txcsum mitigation bridge - ======== ======= ========== ======= - 35 usec 35 usec 35 usec 65 usec - - - NIC: Intel 1Gbe (PCH_D_HV_DM) - Setup: ping - - standard rxcsum no intr netmap - txcsum mitigation bridge - ======== ======= ========== ======= - 57 usec 57 usec / 183 usec - - -20110909 - Transmission performance - OS: FreeBSD 9.0 beta - Kernel: head (r225380) - - NIC: Intel 10Gbe (82599) - Setup: netsend throughput varying cpu freq and number of instances - - freq 1 instance 2 instances 4 instances - ==== ========== =========== =========== - 2934 725 Kpps 1.21 Mpps 1.64 Mpps - 1467 372 Kpps 640 Kpps 808 Kpps - 750 202 Kpps 344 Kpps 432 Kpps - 150 36.7 Kpps 63.2 Kpps 80.9 Kpps - - NIC: Intel 1Gbe (82572EI_COPPER) - Setup: pkt-gen - - freq(MHz) throughput (Mpps) - ======== ================= - 150 1.13 - 2934 1.13 - - NIC: Intel 1Gbe (82574L) - Setup: pkt-gen - - freq(MHz) throughput (Mpps) - ======== ================= - 150 1.13 - 2934 1.13 - - -20110908 - Transmission performance - - OS: FreeBSD 9.0 beta - Kernel: head (r225380) - - NIC: Intel 10Gbe (82599) - Setup: pkt-gen, 4 queues, 1 thread, 1 core, 64 bytes - - freq (MHz) throughput (Mpps) - ========= ================= - 150 2.18 - 450 7.10 - 750 13.93 - 900 14.88 - - NIC: Intel 10Gbe (82599) - Setup: pkt-gen, 4 queues, 4 threads, 4 cores, 64 bytes - - freq (MHz) throughput (Mpps) - ========= ================= - 150 5.32 - 300 9.42 - 450 14.20 - 600 14.88 - - NIC: Intel 1Gbe (PCH_D_HV_DM) - Setup: pkt-gen - - freq(MHz) throughput (Mpps) - ======== ================= - 150 1.39 - 2934 1.39 - - NIC: Intel 1Gbe (ICH10_D_BM_LM) - Setup: pkt-gen - - freq(MHz) throughput (Mpps) - ======== ================= - 150 1.13 - - -20110902 - kevent / kqueue userspace example - - Monitor changes to the /tmp/foo file and print messages whenever - it is deleted, modified or their attributes change. The program - finishes when the file being monitoring is deleted. - - 1 Call kqueue(2) to create a new kernel event queue. The - descriptor it returns will be later used by kevent(2). - - 2 Open the file to monitor and keep its descriptor around. We'll - need this to attach an event monitor to it. - - 3 Initialize a vector of struct kevent elements that describes - the changes to monitor. Since we are only monitoring a single - file, we need a one-element vector. This vector is filled up - with calls to the EV_SET macro. This macro takes: the - descriptor of the kqueue, the descriptor of the file to - monitor (ident), the filter to apply to it, several flags and - optional arguments to the filter. - - 4 Call the kevent(2) function. This system call takes the list - of changes to monitor we constructed before and does not - return until at least one event is received (or when an - associated timeout is exhausted). The function returns the - number of changes received and stores information about them - in another vector of struct kevent elements (we'll only get - notifications of one event at a time, hence we don't use - a vector, but a simple variable). - - 5 Interpret the results. If kevent(2) returned a number greater - than 0, we have to inspect the output vector and see which - events were received. Each filter has its semantics about the - results. For example, we are using the EVFILT_VNODE filter, - which takes a list of conditions to monitor in the fflags - field and modifies it to include only the conditions that - triggered the filter. - - - And now the code: - -#include -#include -#include -#include -#include - -int -main(void) -{ - int f, kq, nev; - struct kevent change; - struct kevent event; - - kq = kqueue(); - if (kq == -1) - perror("kqueue"); - - f = open("/tmp/foo", O_RDONLY); - if (f == -1) - perror("open"); - - EV_SET(&change, f, EVFILT_VNODE, - EV_ADD | EV_ENABLE | EV_ONESHOT, - NOTE_DELETE | NOTE_EXTEND | NOTE_WRITE | NOTE_ATTRIB, - 0, 0); - - for (;;) { - nev = kevent(kq, &change, 1, &event, 1, NULL); - if (nev == -1) - perror("kevent"); - else if (nev > 0) { - if (event.fflags & NOTE_DELETE) { - printf("File deleted\n"); - break; - } - if (event.fflags & NOTE_EXTEND || - event.fflags & NOTE_WRITE) - printf("File modified\n"); - if (event.fflags & NOTE_ATTRIB) - printf("File attributes modified\n"); - } - } - - close(kq); - close(f); - return EXIT_SUCCESS; -} - - - Source: http://blog.julipedia.org/2004/10/example-of-kqueue.html - Documentation: http://people.freebsd.org/~jlemon/papers/kqueue.pdf - - -20110826 - kqueue support - - kevent() introduction - - A client program of the kevent system should: - - use kqueue() to creates a new kernel event queue; - - use kevent() to register, change or check events. - - One of the main difference with the poll() implementation - in kernel space is that kevent() require the kernel to - store some information state. This work is done by the - kqueue() function, that allocates a new kqueue object - into the kernel. - - Each event is identified by the tuple and - can be selected by a filter. A filter is declared - by a filterops structure, and should define at least three - hooks: attach, detach and filter. - - The user application calls the kevent() function with a - list of events. For each event the kernel calls the - kqueue_register() function that lookup its queues and if - there is no match - i) calls the "attach" hook and - ii) add the event on its kqueue. - - The "filter" hook is called each time a data structure - is modified. This means that the .f_filter function should - be placed where new data are read/write. The filter execution - check the filter conditions and possibly add the event to the - kernel active kqueue. - - Example of devices using kevents are: - net/bpf.c net/if_tap.c net/if_tun.c - - The function executed for each event is the netmap_kqfilter() - function, that is declared to the netmap_cdevsw structure. - See the netmap_kqfilter() comments into the netmap.c file - for mode details on its implementation. - -20110811 - kqueue support - The method to implement is defined in sys/sys/conf.h - d_kqfilter_t *d_kqfilter; - one that implements it is sys/kern/kern_tty.c - -2011.06.28 WORKING RELEASE (8943) - -2011.06.27 luigi - -Notes on reset, reinit etc. - - Ring reset can be asynchronous wrt userland. Right now - - RX RING - if there were no pending buffers passed up (keep a - copy of cur and avail in the kring ?) then the op is not - critical - - Otherwise we should try to preserve the range from - cur to cur+avail -- not sure how. - - TX RING: - If the reset does not change hwcur then there is no problem. - Otherwise we should preserve the old hwcur and move the - range of buffers that we get on the next write. - - If the reset does not change hwcur, there is no need - to change cur either. Otherwise we need to set the flag - and throw away part of the content. But this decision - must be taken in the driver which knows the correct - values for cur and avail. - - So, perhaps the callback should be in two steps, one - that returns slot, another one that fixes the flags - at the end. If the reset is harmless, no problem. - If it changes things, then set the flag, throw away stuff - at the next syscall and clear the flag. - -2011.06.21 luigi - - version 0.3 - - + the diffs for RELENG_8 are out of date. - - + revising the netmap_poll implementation, the new method - is generally a lot more efficient than the old one - especially for small bursts, as it avoids some useless function - calls. There is still some issue on the handling of NR_REINIT - which needs to be investigated; - - + the 're' driver seems to have problems receiving. - The machine stalls. - - + started an initial implementation of a bridge in testpcap. - More or less works but seems to lose control when the link goes - down. - - + Bridging performance (Mpps, l=64 until we remove CRCSTRIP) - - burst old new new testpcap (XXX NO) - poll no_ts do_ts (with ts) - - 1 0.21 9.59 8.57 0.75 - 2 0.41 10.05 8.96 1.37 - 16 2.55 4.86 - 1024 10.61 10.66 9.42 7.50 - - - + RX performance, em on PCI bus: 740Kpps (?) - ---------------------------------------------------------- - -2011.06.22 marta - - svn rev. 8912 - - + Bridging and pcap performance (Mpps, l=64) - freq 2934 - bridge no_timestamp=0 - - burst bridge pcap note - 1 9.5 - - 2 9.9 - - 5 10.02 - - 10 10.1 - - 20 10.1 - oscilla 10.3 - 50 10.1 - oscilla 10.3 - 70 10.4 - oscilla - - freq 2934 - bridge no_timestamp=1 - burst bridge pcap note - 1 9.9 777 - 2 10.6 1.3 - 5 11.07 2.6 - 10 11.1 3.7 - 20 11.2 4.8 - 50 11.2 5.7 - - freq 1200 - bridge no_timestamp=0 - - burst bridge pcap note - 1 3.6 - - 2 3.9 - - 5 4.04 - - 10 4.08 - oscilla 4.1 - 20 4.01 - oscilla - 50 4.01 - - - freq 1200 - bridge no_timestamp=1 - burst bridge pcap note - 1 3.8 317 - 2 4.04 592 - 5 4.1 1.2 - 10 4.25 1.95 - 20 4.25 2.7 - 50 4.30 3.5 diff --git a/private/OSX/README b/private/OSX/README deleted file mode 100644 index b16dd11c7..000000000 --- a/private/OSX/README +++ /dev/null @@ -1,7 +0,0 @@ -# $Id$ -# -# 20120614 - -Attempt to build an OSX module using the instructions at -http://unixjunkie.blogspot.com/2006/12/kernel-extension-by-hand.html - diff --git a/private/OSX/netmap.kext/Contents/Info.plist b/private/OSX/netmap.kext/Contents/Info.plist deleted file mode 100644 index d9e6ae4c7..000000000 --- a/private/OSX/netmap.kext/Contents/Info.plist +++ /dev/null @@ -1,35 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - netmap_osx - CFBundleIdentifier - it.unipi.iet.netmap_osx - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - netmap_osx - CFBundlePackageType - KEXT - CFBundleShortVersionString - 1.0.0 - CFBundleSignature - ???? - CFBundleVersion - 1.0.0 - OSBundleLibraries - - com.apple.kpi.bsd - 9.0.0 - com.apple.kpi.libkern - 9.0.0 - com.apple.kpi.mach - 9.0.0 - com.apple.kpi.unsupported - 9.0.0 - - - diff --git a/private/OSX/netmap.kext/Contents/MacOS/Makefile b/private/OSX/netmap.kext/Contents/MacOS/Makefile deleted file mode 100644 index 2c7990df1..000000000 --- a/private/OSX/netmap.kext/Contents/MacOS/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -SRC= netmap_osx.c netmap.c -NM_BASE = ../../../../sys -VPATH = .:../../../../sys/dev/netmap -CFLAGS = -static -fno-builtin -nostdlib -lkmod -r -mlong-branch -CFLAGS += -I/System/Library/Frameworks/Kernel.framework/Headers -CFLAGS += -I$(NM_BASE) -I. -I$(NM_BASE)/dev/netmap -CFLAGS += -include osx_glue.h -CFLAGS += -Wall -netmap_osx: $(SRC) diff --git a/private/OSX/netmap.kext/Contents/MacOS/netmap_osx.c b/private/OSX/netmap.kext/Contents/MacOS/netmap_osx.c deleted file mode 100644 index 0866aea6c..000000000 --- a/private/OSX/netmap.kext/Contents/MacOS/netmap_osx.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * OSX wrapper for netmap module - */ -#include -#include - -kern_return_t netmap_kext_Start(kmod_info_t *ki, void *d) { - printf("Hello, World!\n"); - return KERN_SUCCESS; -} - -kern_return_t netmap_kext_Stop(kmod_info_t *ki, void *d) { - printf("Goodbye, World!\n"); - return KERN_SUCCESS; -} - -extern kern_return_t _start(kmod_info_t *ki, void *data); -extern kern_return_t _stop(kmod_info_t *ki, void *data); - -KMOD_EXPLICIT_DECL(it.unipi.iet.netmap_osx, "1.0.0", _start, _stop) -__private_extern__ kmod_start_func_t *_realmain = netmap_kext_Start; -__private_extern__ kmod_stop_func_t *_antimain = netmap_kext_Stop; -__private_extern__ int _kext_apple_cc = __APPLE_CC__; diff --git a/private/OSX/netmap.kext/Contents/MacOS/osx_glue.h b/private/OSX/netmap.kext/Contents/MacOS/osx_glue.h deleted file mode 100644 index 2c013f2cd..000000000 --- a/private/OSX/netmap.kext/Contents/MacOS/osx_glue.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * glue to compile netmap under FreeBSD - * - * Headers are in - * /System/Library/Frameworks/Kernel.framework/Headers/ - */ -#ifndef OSX_GLUE_H -#define OSX_GLUE_H -#define __FBSDID(x) -#include -#include -#include -#include -#define TUNABLE_INT(name, ptr) - -#include // lock -#include // IOlock -#include // struct selinfo -struct selinfo { // private in the kernel - char dummy[128]; -}; -#include -#include - -/* XXX some types i don't find in OSX */ -typedef void * vm_paddr_t; -struct mbuf; // XXX -struct ifnet; - - -// #include -#include -#include -#include /* BIOCIMMEDIATE */ -//#include -#include -#include -// #include /* bus_dmamap_* */ - - -#endif /* OSX_GLUE_H */ diff --git a/private/README b/private/README deleted file mode 100644 index fe6ab9133..000000000 --- a/private/README +++ /dev/null @@ -1,3 +0,0 @@ -This directory contains files (often stale and incorrect) -not meant for distribution. They are not needed for using -netmap and should not be used. diff --git a/private/extra/20130220-bsd-em-head.diff b/private/extra/20130220-bsd-em-head.diff deleted file mode 100644 index db850df0f..000000000 --- a/private/extra/20130220-bsd-em-head.diff +++ /dev/null @@ -1,825 +0,0 @@ -Index: sys/dev/e1000/if_em.c -=================================================================== ---- sys/dev/e1000/if_em.c (revision 246924) -+++ sys/dev/e1000/if_em.c (working copy) -@@ -32,6 +32,9 @@ - ******************************************************************************/ - /*$FreeBSD$*/ - -+#define MITIGATION -+#define PARAVIRT /* enable virtio-like synchronization */ -+ - #ifdef HAVE_KERNEL_OPTION_HEADERS - #include "opt_device_polling.h" - #include "opt_inet.h" -@@ -336,6 +339,9 @@ - - static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); - -+#define MAX_INTS_PER_SEC 8000 -+#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) -+ - static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); - static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); - TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt); -@@ -356,8 +362,8 @@ - &em_rx_abs_int_delay_dflt, 0, - "Default receive interrupt delay limit in usecs"); - --static int em_rxd = EM_DEFAULT_RXD; --static int em_txd = EM_DEFAULT_TXD; -+static int em_rxd = 8*EM_DEFAULT_RXD; -+static int em_txd = 8*EM_DEFAULT_TXD; - TUNABLE_INT("hw.em.rxd", &em_rxd); - TUNABLE_INT("hw.em.txd", &em_txd); - SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, -@@ -510,6 +516,47 @@ - goto err_pci; - } - -+#ifdef PARAVIRT -+ if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) { -+ uint64_t bus_addr; -+ int tsize; -+ -+ device_printf(dev, "paravirt support on dev %p\n", adapter); -+ tsize = 4096; // XXX one page for the csb -+ if (em_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) { -+ device_printf(dev, "Unable to allocate csb memory\n"); -+ error = ENOMEM; -+ goto err_pci; -+ } -+ /* Setup the Base of the CSB */ -+ adapter->csb = (struct e1000_csb *)adapter->csb_mem.dma_vaddr; -+ /* force the first kick */ -+ adapter->csb->host_need_txkick = 1; /* txring empty */ -+ adapter->csb->guest_need_rxkick = 1; /* no rx packets */ -+ bus_addr = adapter->csb_mem.dma_paddr; -+ em_set_sysctl_value(adapter, "csb_on", -+ "enable paravirt.", &adapter->csb->guest_csb_on, 0); -+ em_set_sysctl_value(adapter, "txc_lim", -+ "txc_lim", &adapter->csb->host_txcycles_lim, 1); -+ /* some stats */ -+#define PA_SC(name, var, val) \ -+ em_set_sysctl_value(adapter, name, name, var, val) -+ PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1); -+ PA_SC("host_need_rxkick",&adapter->csb->host_need_rxkick, 1); -+ PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0); -+ PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1); -+ PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0); -+ PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0); -+ PA_SC("tdt_int_count",&adapter->tdt_int_count, 0); -+ PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0); -+ /* tell the host where the block is */ -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH, -+ (u32)(bus_addr >> 32)); -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAL, -+ (u32)bus_addr); -+ } -+#endif /* PARAVIRT */ -+ - /* - ** For ICH8 and family we need to - ** map the flash memory, and this -@@ -563,12 +610,25 @@ - &adapter->tx_abs_int_delay, - E1000_REGISTER(hw, E1000_TADV), - em_tx_abs_int_delay_dflt); -+ em_add_int_delay_sysctl(adapter, "itr", -+ "interrupt delay limit in usecs/4", -+ &adapter->tx_itr, -+ E1000_REGISTER(&adapter->hw, E1000_ITR), -+ DEFAULT_ITR); - - /* Sysctl for limiting the amount of work done in the taskqueue */ - em_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", &adapter->rx_process_limit, - em_rx_process_limit); - -+#ifdef MITIGATION -+ /* Sysctls to control mitigation */ -+ em_set_sysctl_value(adapter, "mit_enable", -+ "driver TDT mitigation", &adapter->mit_enable, 0); -+ em_set_sysctl_value(adapter, "rx_retries", -+ "driver rx retries", &adapter->rx_retries, 0); -+#endif /* MITIGATION */ -+ - /* - * Validate number of transmit and receive descriptors. It - * must not exceed hardware maximum, and must be multiple -@@ -663,7 +723,7 @@ - device_printf(dev, - "The EEPROM Checksum Is Not Valid\n"); - error = EIO; -- goto err_late; -+ // XXX goto err_late; - } - } - -@@ -741,6 +801,10 @@ - if (adapter->ifp != NULL) - if_free(adapter->ifp); - err_pci: -+#ifdef PARAVIRT -+ if (adapter->csb) -+ em_dma_free(adapter, &adapter->csb_mem); -+#endif /* PARAVIRT */ - em_free_pci_resources(adapter); - free(adapter->mta, M_DEVBUF); - EM_CORE_LOCK_DESTROY(adapter); -@@ -788,6 +852,12 @@ - - e1000_phy_hw_reset(&adapter->hw); - -+#ifdef PARAVIRT -+ if (adapter->csb) { -+ em_dma_free(adapter, &adapter->csb_mem); -+ adapter->csb = NULL; -+ } -+#endif /* PARAVIRT */ - em_release_manageability(adapter); - em_release_hw_control(adapter); - -@@ -942,6 +1012,16 @@ - em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; -+#ifdef PARAVIRT -+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE && adapter->csb && -+ adapter->csb->guest_csb_on && !adapter->csb->guest_need_txkick) { -+ adapter->csb->guest_need_txkick = 1; -+ adapter->guest_need_kick_count++; -+ // XXX memory barrier -+ em_txeof(txr); // XXX possibly clear IFF_DRV_OACTIVE -+ } -+#endif /* PARAVIRT */ -+ - return (err); - } - -@@ -2098,6 +2178,35 @@ - */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); -+ -+#ifdef PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_tdt = i; -+ /* XXX memory barrier ? */ -+ if (adapter->csb->guest_csb_on && -+ !adapter->csb->host_need_txkick) { -+ if (txr->tx_avail <= 64) {// XXX -+ em_txeof(txr); -+ } -+ adapter->tdt_csb_count++; -+ return (0); -+ } -+ } -+#endif /* PARAVIRT */ -+ -+#ifdef MITIGATION -+ if (adapter->mit_enable) { -+ if (adapter->shadow_tdt & MIT_PENDING_INT) { -+ /* signal intr and data pending */ -+ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); -+ return (0); -+ } else { -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } -+ } -+ adapter->tdt_reg_count++; -+#endif /* MITIGATION */ -+ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); - - return (0); -@@ -2255,6 +2364,17 @@ - taskqueue_enqueue(txr->tq, &txr->tx_task); - } - -+#if 0 // def PARAVIRT -+ /* recover space if needed */ -+ if (adapter->csb && adapter->csb->guest_csb_on && -+ (adapter->watchdog_check == TRUE) && -+ (ticks - adapter->watchdog_time > EM_WATCHDOG) && -+ (txr->tx_avail != adapter->num_tx_desc) ) { -+ em_txeof(txr); -+ /* XXX should also recover from stalls ? */ -+ } -+#endif /* PARAVIRT */ -+ - adapter->pause_frames = 0; - callout_reset(&adapter->timer, hz, em_local_timer, adapter); - #ifndef DEVICE_POLLING -@@ -3877,6 +3997,17 @@ - - txr->next_to_clean = first; - -+#ifdef MITIGATION -+ if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { -+ /* a tdt write is pending, do it */ -+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), -+ 0xffff & adapter->shadow_tdt); -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } else { -+ adapter->shadow_tdt = 0; // disable -+ } -+#endif /* MITIGATION */ -+ - /* - ** Watchdog calculation, we know there's - ** work outstanding or the first return -@@ -3975,6 +4106,14 @@ - ** Update the tail pointer only if, - ** and as far as we have refreshed. - */ -+#ifdef PARAVIRT // XXX fix for multiqueue -+ if (cleaned) { -+ adapter->csb->guest_rdt = rxr->next_to_refresh; -+ if (adapter->csb->guest_csb_on && -+ !adapter->csb->host_need_rxkick) -+ return; -+ } -+#endif /* PARAVIRT */ - if (cleaned) - E1000_WRITE_REG(&adapter->hw, - E1000_RDT(rxr->me), rxr->next_to_refresh); -@@ -4246,8 +4385,6 @@ - * Enable receive unit. - * - **********************************************************************/ --#define MAX_INTS_PER_SEC 8000 --#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) - - static void - em_initialize_receive_unit(struct adapter *adapter) -@@ -4306,6 +4443,7 @@ - E1000_WRITE_REG(hw, E1000_RDTR, 0x20); - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { -+ int t = adapter->num_rx_desc - 1; - /* Setup the Base and Length of the Rx Descriptor Ring */ - bus_addr = rxr->rxdma.dma_paddr; - E1000_WRITE_REG(hw, E1000_RDLEN(i), -@@ -4324,12 +4462,14 @@ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; -- int t = na->num_rx_desc - 1 - kring->nr_hwavail; -+ t = na->num_rx_desc - 1 - kring->nr_hwavail; -+ } -+#endif /* DEV_NETMAP */ - -- E1000_WRITE_REG(hw, E1000_RDT(i), t); -- } else --#endif /* DEV_NETMAP */ -- E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); -+#ifdef PARAVIRT -+ adapter->csb->guest_rdt = t; -+#endif /* PARAVIRT */ -+ E1000_WRITE_REG(hw, E1000_RDT(i), t); - } - - /* Set PTHRESH for improved jumbo performance */ -@@ -4402,7 +4542,11 @@ - int i, processed, rxdone = 0; - bool eop; - struct e1000_rx_desc *cur; -+ int retries; - -+#ifdef PARAVIRT -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* PARAVIRT */ - EM_RX_LOCK(rxr); - - #ifdef DEV_NETMAP -@@ -4419,6 +4563,7 @@ - } - #endif /* DEV_NETMAP */ - -+ retries = 0; - for (i = rxr->next_to_check, processed = 0; count != 0;) { - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) -@@ -4431,8 +4576,21 @@ - status = cur->status; - mp = sendmp = NULL; - -- if ((status & E1000_RXD_STAT_DD) == 0) -+ if ((status & E1000_RXD_STAT_DD) == 0) { -+ if (++retries <= adapter->rx_retries) { -+ continue; -+ } -+#ifdef PARAVIRT -+ if (adapter->csb->guest_need_rxkick == 0) { -+ adapter->csb->guest_need_rxkick = 1; -+ continue; -+ } -+#endif /* PARAVIRT */ - break; -+ } -+#ifdef PARAVIRT -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* PARAVIRT */ - - len = le16toh(cur->length); - eop = (status & E1000_RXD_STAT_EOP) != 0; -@@ -5614,6 +5772,8 @@ - return (EINVAL); - info->value = usecs; - ticks = EM_USECS_TO_TICKS(usecs); -+ if (info->offset == E1000_ITR) /* units are 256ns here */ -+ ticks *= 4; - - adapter = info->adapter; - -Index: sys/dev/e1000/if_em.h -=================================================================== ---- sys/dev/e1000/if_em.h (revision 246924) -+++ sys/dev/e1000/if_em.h (working copy) -@@ -271,6 +271,28 @@ - int value; /* Current value in usecs */ - }; - -+#ifdef PARAVIRT -+#define E1000_PARA_SUBDEV 0x1101 /* special id */ -+#define E1000_CSBAL 0x02830 /* csb physical address */ -+#define E1000_CSBAH 0x02834 -+struct e1000_csb { /* comm. block */ -+ uint32_t guest_tdt; /* signals from guest */ -+ uint32_t guest_need_txkick; /* out of tx bufs */ -+ uint32_t guest_need_rxkick; /* out of rx bufs */ -+ uint32_t guest_csb_on; /* mode enabled on the guest */ -+ uint32_t guest_rdt; /* signals from guest */ -+ uint32_t pad[11]; /* to 64 bytes */ -+ -+ uint32_t host_tdh; /* mirror tdh, unused */ -+ uint32_t host_need_txkick; /* enable mode */ -+ uint32_t host_txcycles_lim; /* cycles before stop bh */ -+ uint32_t host_txcycles; /* current bh cycles */ -+ uint32_t host_rdh; /* mirror rdh, unused */ -+ uint32_t host_need_rxkick; /* ??? */ -+ -+}; -+#endif /* PARAVIRT */ -+ - /* - * The transmit ring, one per tx queue - */ -@@ -429,6 +451,7 @@ - struct em_int_delay_info tx_abs_int_delay; - struct em_int_delay_info rx_int_delay; - struct em_int_delay_info rx_abs_int_delay; -+ struct em_int_delay_info tx_itr; - - /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; -@@ -440,6 +463,24 @@ - unsigned long watchdog_events; - unsigned long link_irq; - -+#ifdef MITIGATION -+ /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */ -+#define MIT_PENDING_INT 0x10000 /* pending interrupt */ -+#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */ -+ uint32_t shadow_tdt; -+ uint32_t mit_enable; -+ uint32_t rx_retries; /* optimize rx loop */ -+#endif /* MITIGATION */ -+ -+#ifdef PARAVIRT -+ struct em_dma_alloc csb_mem; /* phys address */ -+ struct e1000_csb *csb; /* virtual addr */ -+ uint32_t tdt_csb_count;// XXX stat -+ uint32_t tdt_reg_count;// XXX stat -+ uint32_t tdt_int_count;// XXX stat -+ uint32_t guest_need_kick_count;// XXX stat -+#endif /* PARAVIRT */ -+ - struct e1000_hw_stats stats; - }; - -Index: sys/dev/e1000/if_lem.c -=================================================================== ---- sys/dev/e1000/if_lem.c (revision 246924) -+++ sys/dev/e1000/if_lem.c (working copy) -@@ -32,6 +32,9 @@ - ******************************************************************************/ - /*$FreeBSD$*/ - -+#define MITIGATION -+#define PARAVIRT /* enable virtio-like synchronization */ -+ - #ifdef HAVE_KERNEL_OPTION_HEADERS - #include "opt_device_polling.h" - #include "opt_inet.h" -@@ -281,12 +284,15 @@ - #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) - #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) - -+#define MAX_INTS_PER_SEC 8000 -+#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) -+ - static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); - static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); - static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); - static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); --static int lem_rxd = EM_DEFAULT_RXD; --static int lem_txd = EM_DEFAULT_TXD; -+static int lem_rxd = 8*EM_DEFAULT_RXD; -+static int lem_txd = 8*EM_DEFAULT_TXD; - static int lem_smart_pwr_down = FALSE; - - /* Controls whether promiscuous also shows bad packets */ -@@ -442,6 +448,11 @@ - &adapter->tx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_TADV), - lem_tx_abs_int_delay_dflt); -+ lem_add_int_delay_sysctl(adapter, "itr", -+ "interrupt delay limit in usecs/4", -+ &adapter->tx_itr, -+ E1000_REGISTER(&adapter->hw, E1000_ITR), -+ DEFAULT_ITR); - } - - /* Sysctls for limiting the amount of work done in the taskqueue */ -@@ -449,6 +460,14 @@ - "max number of rx packets to process", &adapter->rx_process_limit, - lem_rx_process_limit); - -+#ifdef MITIGATION -+ /* Sysctls to control mitigation */ -+ lem_add_rx_process_limit(adapter, "mit_enable", -+ "driver TDT mitigation", &adapter->mit_enable, 0); -+ lem_add_rx_process_limit(adapter, "rx_retries", -+ "driver rx retries", &adapter->rx_retries, 0); -+#endif /* MITIGATION */ -+ - /* Sysctl for setting the interface flow control */ - lem_set_flow_cntrl(adapter, "flow_control", - "flow control setting", -@@ -506,6 +525,46 @@ - */ - adapter->hw.mac.report_tx_early = 1; - -+#ifdef PARAVIRT -+ if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) { -+ uint64_t bus_addr; -+ -+ device_printf(dev, "paravirt support on dev %p\n", adapter); -+ tsize = 4096; // XXX one page for the csb -+ if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) { -+ device_printf(dev, "Unable to allocate csb memory\n"); -+ error = ENOMEM; -+ goto err_csb; -+ } -+ /* Setup the Base of the CSB */ -+ adapter->csb = (struct e1000_csb *)adapter->csb_mem.dma_vaddr; -+ /* force the first kick */ -+ adapter->csb->host_need_txkick = 1; /* txring empty */ -+ adapter->csb->guest_need_rxkick = 1; /* no rx packets */ -+ bus_addr = adapter->csb_mem.dma_paddr; -+ lem_add_rx_process_limit(adapter, "csb_on", -+ "enable paravirt.", &adapter->csb->guest_csb_on, 0); -+ lem_add_rx_process_limit(adapter, "txc_lim", -+ "txc_lim", &adapter->csb->host_txcycles_lim, 1); -+ /* some stats */ -+#define PA_SC(name, var, val) \ -+ lem_add_rx_process_limit(adapter, name, name, var, val) -+ PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1); -+ PA_SC("host_need_rxkick",&adapter->csb->host_need_rxkick, 1); -+ PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0); -+ PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1); -+ PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0); -+ PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0); -+ PA_SC("tdt_int_count",&adapter->tdt_int_count, 0); -+ PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0); -+ /* tell the host where the block is */ -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH, -+ (u32)(bus_addr >> 32)); -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAL, -+ (u32)bus_addr); -+ } -+#endif /* PARAVIRT */ -+ - tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), - EM_DBA_ALIGN); - -@@ -664,6 +723,11 @@ - err_rx_desc: - lem_dma_free(adapter, &adapter->txdma); - err_tx_desc: -+#ifdef PARAVIRT -+ lem_dma_free(adapter, &adapter->csb_mem); -+err_csb: -+#endif /* PARAVIRT */ -+ - err_pci: - if (adapter->ifp != NULL) - if_free(adapter->ifp); -@@ -751,6 +815,12 @@ - adapter->rx_desc_base = NULL; - } - -+#ifdef PARAVIRT -+ if (adapter->csb) { -+ lem_dma_free(adapter, &adapter->csb_mem); -+ adapter->csb = NULL; -+ } -+#endif /* PARAVIRT */ - lem_release_hw_control(adapter); - free(adapter->mta, M_DEVBUF); - EM_TX_LOCK_DESTROY(adapter); -@@ -860,6 +930,15 @@ - } - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; -+#ifdef PARAVIRT -+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE && adapter->csb && -+ adapter->csb->guest_csb_on && !adapter->csb->guest_need_txkick) { -+ adapter->csb->guest_need_txkick = 1; -+ adapter->guest_need_kick_count++; -+ // XXX memory barrier -+ lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE -+ } -+#endif /* PARAVIRT */ - - return; - } -@@ -1300,6 +1379,7 @@ - lem_rxeof(adapter, -1, NULL); - - EM_TX_LOCK(adapter); -+ adapter->tdt_int_count++; - lem_txeof(adapter); - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) -@@ -1337,12 +1417,17 @@ - - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { -- lem_rxeof(adapter, adapter->rx_process_limit, NULL); -+ bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); - EM_TX_LOCK(adapter); -+ adapter->tdt_int_count++; - lem_txeof(adapter); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); -+ if (more) { -+ taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); -+ return; -+ } - } - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) -@@ -1702,6 +1787,35 @@ - */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); -+ -+#ifdef PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_tdt = i; -+ /* XXX memory barrier ? */ -+ if (adapter->csb->guest_csb_on && -+ !adapter->csb->host_need_txkick) { -+ if (adapter->num_tx_desc_avail <= 64) {// XXX -+ lem_txeof(adapter); -+ } -+ adapter->tdt_csb_count++; -+ return (0); -+ } -+ } -+#endif /* PARAVIRT */ -+ -+#ifdef MITIGATION -+ if (adapter->mit_enable) { -+ if (adapter->shadow_tdt & MIT_PENDING_INT) { -+ /* signal intr and data pending */ -+ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); -+ return (0); -+ } else { -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } -+ } -+ adapter->tdt_reg_count++; -+#endif /* MITIGATION */ -+ - if (adapter->hw.mac.type == e1000_82547 && - adapter->link_duplex == HALF_DUPLEX) - lem_82547_move_tail(adapter); -@@ -1957,6 +2071,16 @@ - - lem_smartspeed(adapter); - -+#ifdef PARAVIRT -+ /* recover space if needed */ -+ if (adapter->csb && adapter->csb->guest_csb_on && -+ (adapter->watchdog_check == TRUE) && -+ (ticks - adapter->watchdog_time > EM_WATCHDOG) && -+ (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) { -+ lem_txeof(adapter); -+ /* XXX should also recover from stalls ? */ -+ } -+#endif /* PARAVIRT */ - /* - * We check the watchdog: the time since - * the last TX descriptor was cleaned. -@@ -3027,6 +3151,16 @@ - adapter->next_tx_to_clean = first; - adapter->num_tx_desc_avail = num_avail; - -+#ifdef MITIGATION -+ if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { -+ /* a tdt write is pending, do it */ -+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), -+ 0xffff & adapter->shadow_tdt); -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } else { -+ adapter->shadow_tdt = 0; // disable -+ } -+#endif /* MITIGATION */ - /* - * If we have enough room, clear IFF_DRV_OACTIVE to - * tell the stack that it is OK to send packets. -@@ -3034,6 +3168,12 @@ - */ - if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; -+#ifdef PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_need_txkick = 0; -+ // XXX memory barrier -+ } -+#endif /* PARAVIRT */ - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { - adapter->watchdog_check = FALSE; - return; -@@ -3246,8 +3386,6 @@ - * Enable receive unit. - * - **********************************************************************/ --#define MAX_INTS_PER_SEC 8000 --#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) - - static void - lem_initialize_receive_unit(struct adapter *adapter) -@@ -3347,9 +3485,16 @@ - - if (t >= na->num_rx_desc) - t -= na->num_rx_desc; -+#ifdef PARAVIRT -+ adapter->csb->guest_rdt = t; -+#endif /* PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), t); -- } else -+ return; -+ } - #endif /* DEV_NETMAP */ -+#ifdef PARAVIRT -+ adapter->csb->guest_rdt = adapter->num_rx_desc - 1; -+#endif /* PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); - - return; -@@ -3426,7 +3571,12 @@ - u16 len, desc_len, prev_len_adj; - int i, rx_sent = 0; - struct e1000_rx_desc *current_desc; -+ int retries; - -+#ifdef PARAVIRT -+ ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* PARAVIRT */ - EM_RX_LOCK(adapter); - i = adapter->next_rx_desc_to_check; - current_desc = &adapter->rx_desc_base[i]; -@@ -3443,19 +3593,39 @@ - } - #endif /* DEV_NETMAP */ - -+#if 0 // XXX optimization ? - if (!((current_desc->status) & E1000_RXD_STAT_DD)) { - if (done != NULL) - *done = rx_sent; - EM_RX_UNLOCK(adapter); - return (FALSE); - } -+#endif /* 0 */ - -+ retries = 0; - while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) { - struct mbuf *m = NULL; - - status = current_desc->status; -- if ((status & E1000_RXD_STAT_DD) == 0) -+ if ((status & E1000_RXD_STAT_DD) == 0) { -+ if (++retries <= adapter->rx_retries) { -+ continue; -+ } -+#ifdef PARAVIRT -+ if (adapter->csb->guest_need_rxkick == 0) { -+ ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 1; -+ continue; -+ } -+#endif /* PARAVIRT */ - break; -+ } -+#ifdef PARAVIRT -+ if (adapter->csb->guest_need_rxkick) -+ ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* PARAVIRT */ -+ retries = 0; - - mp = adapter->rx_buffer_area[i].m_head; - /* -@@ -3599,6 +3769,10 @@ - /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ - if (--i < 0) - i = adapter->num_rx_desc - 1; -+#ifdef PARAVIRT -+ adapter->csb->guest_rdt = i; -+ if (!adapter->csb->guest_csb_on || adapter->csb->host_need_rxkick) -+#endif /* PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); - if (done != NULL) - *done = rx_sent; -@@ -4588,6 +4762,8 @@ - return (EINVAL); - info->value = usecs; - ticks = EM_USECS_TO_TICKS(usecs); -+ if (info->offset == E1000_ITR) /* units are 256ns here */ -+ ticks *= 4; - - adapter = info->adapter; - -Index: sys/dev/e1000/if_lem.h -=================================================================== ---- sys/dev/e1000/if_lem.h (revision 246924) -+++ sys/dev/e1000/if_lem.h (working copy) -@@ -265,6 +265,28 @@ - #define PICOSECS_PER_TICK 20833 - #define TSYNC_PORT 319 /* UDP port for the protocol */ - -+#ifdef PARAVIRT -+#define E1000_PARA_SUBDEV 0x1101 /* special id */ -+#define E1000_CSBAL 0x02830 /* csb physical address */ -+#define E1000_CSBAH 0x02834 -+struct e1000_csb { /* comm. block */ -+ uint32_t guest_tdt; /* signals from guest */ -+ uint32_t guest_need_txkick; /* out of tx bufs */ -+ uint32_t guest_need_rxkick; /* out of rx bufs */ -+ uint32_t guest_csb_on; /* mode enabled on the guest */ -+ uint32_t guest_rdt; /* signals from guest */ -+ uint32_t pad[11]; /* to 64 bytes */ -+ -+ uint32_t host_tdh; /* mirror tdh, unused */ -+ uint32_t host_need_txkick; /* enable mode */ -+ uint32_t host_txcycles_lim; /* cycles before stop bh */ -+ uint32_t host_txcycles; /* current bh cycles */ -+ uint32_t host_rdh; /* mirror rdh, unused */ -+ uint32_t host_need_rxkick; /* ??? */ -+ -+}; -+#endif /* PARAVIRT */ -+ - /* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. -@@ -363,6 +385,7 @@ - struct em_int_delay_info tx_abs_int_delay; - struct em_int_delay_info rx_int_delay; - struct em_int_delay_info rx_abs_int_delay; -+ struct em_int_delay_info tx_itr; - - /* - * Transmit definitions -@@ -436,7 +459,24 @@ - boolean_t pcix_82544; - boolean_t in_detach; - -+#ifdef MITIGATION -+ /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */ -+#define MIT_PENDING_INT 0x10000 /* pending interrupt */ -+#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */ -+ uint32_t shadow_tdt; -+ uint32_t mit_enable; -+ uint32_t rx_retries; /* optimize rx loop */ -+#endif /* MITIGATION */ - -+#ifdef PARAVIRT -+ struct em_dma_alloc csb_mem; /* phys address */ -+ struct e1000_csb *csb; /* virtual addr */ -+ uint32_t tdt_csb_count;// XXX stat -+ uint32_t tdt_reg_count;// XXX stat -+ uint32_t tdt_int_count;// XXX stat -+ uint32_t guest_need_kick_count;// XXX stat -+#endif /* PARAVIRT */ -+ - struct e1000_hw_stats stats; - }; - diff --git a/private/extra/20130222-bsd-em-head.diff b/private/extra/20130222-bsd-em-head.diff deleted file mode 100644 index b8eb40bc5..000000000 --- a/private/extra/20130222-bsd-em-head.diff +++ /dev/null @@ -1,546 +0,0 @@ -Index: head/release/picobsd/floppy.tree/etc/ttys -=================================================================== ---- head/release/picobsd/floppy.tree/etc/ttys (revision 247068) -+++ head/release/picobsd/floppy.tree/etc/ttys (working copy) -@@ -8,6 +8,7 @@ - # This entry needed for asking password when init goes to single-user mode - # If you want to be asked for password, change "secure" to "insecure" here - #console none unknown off secure -+console "/usr/libexec/getty std.9600" vt100 on secure - vga none xterm off secure - # - ttyv0 "/usr/libexec/getty Pc" xterm on secure -Index: head/sys/dev/e1000/if_em.h -=================================================================== ---- head/sys/dev/e1000/if_em.h (revision 247068) -+++ head/sys/dev/e1000/if_em.h (working copy) -@@ -271,6 +271,28 @@ - int value; /* Current value in usecs */ - }; - -+#ifdef PARAVIRT -+#define E1000_PARA_SUBDEV 0x1101 /* special id */ -+#define E1000_CSBAL 0x02830 /* csb physical address */ -+#define E1000_CSBAH 0x02834 -+struct e1000_csb { /* comm. block */ -+ uint32_t guest_tdt; /* signals from guest */ -+ uint32_t guest_need_txkick; /* out of tx bufs */ -+ uint32_t guest_need_rxkick; /* out of rx bufs */ -+ uint32_t guest_csb_on; /* mode enabled on the guest */ -+ uint32_t guest_rdt; /* signals from guest */ -+ uint32_t pad[11]; /* to 64 bytes */ -+ -+ uint32_t host_tdh; /* mirror tdh, unused */ -+ uint32_t host_need_txkick; /* enable mode */ -+ uint32_t host_txcycles_lim; /* cycles before stop bh */ -+ uint32_t host_txcycles; /* current bh cycles */ -+ uint32_t host_rdh; /* mirror rdh, unused */ -+ uint32_t host_need_rxkick; /* ??? */ -+ -+}; -+#endif /* PARAVIRT */ -+ - /* - * The transmit ring, one per tx queue - */ -@@ -429,6 +451,7 @@ - struct em_int_delay_info tx_abs_int_delay; - struct em_int_delay_info rx_int_delay; - struct em_int_delay_info rx_abs_int_delay; -+ struct em_int_delay_info tx_itr; - - /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; -@@ -440,6 +463,24 @@ - unsigned long watchdog_events; - unsigned long link_irq; - -+#ifdef MITIGATION -+ /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */ -+#define MIT_PENDING_INT 0x10000 /* pending interrupt */ -+#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */ -+ uint32_t shadow_tdt; -+ uint32_t mit_enable; -+ uint32_t rx_retries; /* optimize rx loop */ -+#endif /* MITIGATION */ -+ -+#ifdef PARAVIRT -+ struct em_dma_alloc csb_mem; /* phys address */ -+ struct e1000_csb *csb; /* virtual addr */ -+ uint32_t tdt_csb_count;// XXX stat -+ uint32_t tdt_reg_count;// XXX stat -+ uint32_t tdt_int_count;// XXX stat -+ uint32_t guest_need_kick_count;// XXX stat -+#endif /* PARAVIRT */ -+ - struct e1000_hw_stats stats; - }; - -Index: head/sys/dev/e1000/if_lem.c -=================================================================== ---- head/sys/dev/e1000/if_lem.c (revision 247068) -+++ head/sys/dev/e1000/if_lem.c (working copy) -@@ -32,6 +32,9 @@ - ******************************************************************************/ - /*$FreeBSD$*/ - -+#define LEM_SEND_COMBINING -+#define LEM_PARAVIRT /* enable virtio-like synchronization */ -+ - #ifdef HAVE_KERNEL_OPTION_HEADERS - #include "opt_device_polling.h" - #include "opt_inet.h" -@@ -281,12 +284,15 @@ - #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) - #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) - -+#define MAX_INTS_PER_SEC 8000 -+#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) -+ - static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); - static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); - static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); - static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); --static int lem_rxd = EM_DEFAULT_RXD; --static int lem_txd = EM_DEFAULT_TXD; -+static int lem_rxd = 8*EM_DEFAULT_RXD; -+static int lem_txd = 8*EM_DEFAULT_TXD; - static int lem_smart_pwr_down = FALSE; - - /* Controls whether promiscuous also shows bad packets */ -@@ -442,6 +448,11 @@ - &adapter->tx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_TADV), - lem_tx_abs_int_delay_dflt); -+ lem_add_int_delay_sysctl(adapter, "itr", -+ "interrupt delay limit in usecs/4", -+ &adapter->tx_itr, -+ E1000_REGISTER(&adapter->hw, E1000_ITR), -+ DEFAULT_ITR); - } - - /* Sysctls for limiting the amount of work done in the taskqueue */ -@@ -449,6 +460,14 @@ - "max number of rx packets to process", &adapter->rx_process_limit, - lem_rx_process_limit); - -+#ifdef LEM_SEND_COMBINING -+ /* Sysctls to control mitigation */ -+ lem_add_rx_process_limit(adapter, "tx_sc", -+ "tx send combining", &adapter->tx_sc_on, 0); -+ lem_add_rx_process_limit(adapter, "rx_retries", -+ "driver rx retries", &adapter->rx_retries, 0); -+#endif /* LEM_SEND_COMBINING */ -+ - /* Sysctl for setting the interface flow control */ - lem_set_flow_cntrl(adapter, "flow_control", - "flow control setting", -@@ -506,6 +525,46 @@ - */ - adapter->hw.mac.report_tx_early = 1; - -+#ifdef LEM_PARAVIRT -+ if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) { -+ uint64_t bus_addr; -+ -+ device_printf(dev, "paravirt support on dev %p\n", adapter); -+ tsize = 4096; // XXX one page for the csb -+ if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) { -+ device_printf(dev, "Unable to allocate csb memory\n"); -+ error = ENOMEM; -+ goto err_csb; -+ } -+ /* Setup the Base of the CSB */ -+ adapter->csb = (struct e1000_csb *)adapter->csb_mem.dma_vaddr; -+ /* force the first kick */ -+ adapter->csb->host_need_txkick = 1; /* txring empty */ -+ adapter->csb->guest_need_rxkick = 1; /* no rx packets */ -+ bus_addr = adapter->csb_mem.dma_paddr; -+ lem_add_rx_process_limit(adapter, "csb_on", -+ "enable paravirt.", &adapter->csb->guest_csb_on, 0); -+ lem_add_rx_process_limit(adapter, "txc_lim", -+ "txc_lim", &adapter->csb->host_txcycles_lim, 1); -+ /* some stats */ -+#define PA_SC(name, var, val) \ -+ lem_add_rx_process_limit(adapter, name, name, var, val) -+ PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1); -+ PA_SC("host_need_rxkick",&adapter->csb->host_need_rxkick, 1); -+ PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0); -+ PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1); -+ PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0); -+ PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0); -+ PA_SC("tdt_int_count",&adapter->tdt_int_count, 0); -+ PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0); -+ /* tell the host where the block is */ -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH, -+ (u32)(bus_addr >> 32)); -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAL, -+ (u32)bus_addr); -+ } -+#endif /* LEM_PARAVIRT */ -+ - tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), - EM_DBA_ALIGN); - -@@ -664,6 +723,11 @@ - err_rx_desc: - lem_dma_free(adapter, &adapter->txdma); - err_tx_desc: -+#ifdef LEM_PARAVIRT -+ lem_dma_free(adapter, &adapter->csb_mem); -+err_csb: -+#endif /* LEM_PARAVIRT */ -+ - err_pci: - if (adapter->ifp != NULL) - if_free(adapter->ifp); -@@ -751,6 +815,12 @@ - adapter->rx_desc_base = NULL; - } - -+#ifdef LEM_PARAVIRT -+ if (adapter->csb) { -+ lem_dma_free(adapter, &adapter->csb_mem); -+ adapter->csb = NULL; -+ } -+#endif /* LEM_PARAVIRT */ - lem_release_hw_control(adapter); - free(adapter->mta, M_DEVBUF); - EM_TX_LOCK_DESTROY(adapter); -@@ -860,6 +930,15 @@ - } - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; -+#ifdef LEM_PARAVIRT -+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE && adapter->csb && -+ adapter->csb->guest_csb_on && !adapter->csb->guest_need_txkick) { -+ adapter->csb->guest_need_txkick = 1; -+ adapter->guest_need_kick_count++; -+ // XXX memory barrier -+ lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE -+ } -+#endif /* LEM_PARAVIRT */ - - return; - } -@@ -1300,6 +1379,7 @@ - lem_rxeof(adapter, -1, NULL); - - EM_TX_LOCK(adapter); -+ adapter->tdt_int_count++; - lem_txeof(adapter); - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) -@@ -1337,12 +1417,17 @@ - - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { -- lem_rxeof(adapter, adapter->rx_process_limit, NULL); -+ bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); - EM_TX_LOCK(adapter); -+ adapter->tdt_int_count++; - lem_txeof(adapter); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - lem_start_locked(ifp); - EM_TX_UNLOCK(adapter); -+ if (more) { -+ taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); -+ return; -+ } - } - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) -@@ -1702,6 +1787,35 @@ - */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); -+ -+#ifdef LEM_PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_tdt = i; -+ /* XXX memory barrier ? */ -+ if (adapter->csb->guest_csb_on && -+ !adapter->csb->host_need_txkick) { -+ if (adapter->num_tx_desc_avail <= 64) {// XXX -+ lem_txeof(adapter); -+ } -+ adapter->tdt_csb_count++; -+ return (0); -+ } -+ } -+#endif /* LEM_PARAVIRT */ -+ -+#ifdef LEM_SEND_COMBINING -+ if (adapter->tx_sc_on) { -+ if (adapter->shadow_tdt & MIT_PENDING_INT) { -+ /* signal intr and data pending */ -+ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); -+ return (0); -+ } else { -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } -+ } -+ adapter->tdt_reg_count++; -+#endif /* LEM_SEND_COMBINING */ -+ - if (adapter->hw.mac.type == e1000_82547 && - adapter->link_duplex == HALF_DUPLEX) - lem_82547_move_tail(adapter); -@@ -1957,6 +2071,16 @@ - - lem_smartspeed(adapter); - -+#ifdef LEM_PARAVIRT -+ /* recover space if needed */ -+ if (adapter->csb && adapter->csb->guest_csb_on && -+ (adapter->watchdog_check == TRUE) && -+ (ticks - adapter->watchdog_time > EM_WATCHDOG) && -+ (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) { -+ lem_txeof(adapter); -+ /* XXX should also recover from stalls ? */ -+ } -+#endif /* LEM_PARAVIRT */ - /* - * We check the watchdog: the time since - * the last TX descriptor was cleaned. -@@ -3027,6 +3151,16 @@ - adapter->next_tx_to_clean = first; - adapter->num_tx_desc_avail = num_avail; - -+#ifdef LEM_SEND_COMBINING -+ if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { -+ /* a tdt write is pending, do it */ -+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), -+ 0xffff & adapter->shadow_tdt); -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } else { -+ adapter->shadow_tdt = 0; // disable -+ } -+#endif /* LEM_SEND_COMBINING */ - /* - * If we have enough room, clear IFF_DRV_OACTIVE to - * tell the stack that it is OK to send packets. -@@ -3034,6 +3168,12 @@ - */ - if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; -+#ifdef LEM_LEM_PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_need_txkick = 0; -+ // XXX memory barrier -+ } -+#endif /* LEM_LEM_PARAVIRT */ - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { - adapter->watchdog_check = FALSE; - return; -@@ -3246,8 +3386,6 @@ - * Enable receive unit. - * - **********************************************************************/ --#define MAX_INTS_PER_SEC 8000 --#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) - - static void - lem_initialize_receive_unit(struct adapter *adapter) -@@ -3347,9 +3485,16 @@ - - if (t >= na->num_rx_desc) - t -= na->num_rx_desc; -+#ifdef LEM_PARAVIRT -+ adapter->csb->guest_rdt = t; -+#endif /* LEM_PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), t); -- } else -+ return; -+ } - #endif /* DEV_NETMAP */ -+#ifdef LEM_PARAVIRT -+ adapter->csb->guest_rdt = adapter->num_rx_desc - 1; -+#endif /* LEM_PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); - - return; -@@ -3426,7 +3571,12 @@ - u16 len, desc_len, prev_len_adj; - int i, rx_sent = 0; - struct e1000_rx_desc *current_desc; -+ int retries; - -+#ifdef LEM_PARAVIRT -+ ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* LEM_PARAVIRT */ - EM_RX_LOCK(adapter); - i = adapter->next_rx_desc_to_check; - current_desc = &adapter->rx_desc_base[i]; -@@ -3443,19 +3593,39 @@ - } - #endif /* DEV_NETMAP */ - -+#if 0 // XXX optimization ? - if (!((current_desc->status) & E1000_RXD_STAT_DD)) { - if (done != NULL) - *done = rx_sent; - EM_RX_UNLOCK(adapter); - return (FALSE); - } -+#endif /* 0 */ - -+ retries = 0; - while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) { - struct mbuf *m = NULL; - - status = current_desc->status; -- if ((status & E1000_RXD_STAT_DD) == 0) -+ if ((status & E1000_RXD_STAT_DD) == 0) { -+ if (++retries <= adapter->rx_retries) { -+ continue; -+ } -+#ifdef LEM_PARAVIRT -+ if (adapter->csb->guest_need_rxkick == 0) { -+ ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 1; -+ continue; -+ } -+#endif /* LEM_PARAVIRT */ - break; -+ } -+#ifdef LEM_PARAVIRT -+ if (adapter->csb->guest_need_rxkick) -+ ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ adapter->csb->guest_need_rxkick = 0; -+#endif /* LEM_PARAVIRT */ -+ retries = 0; - - mp = adapter->rx_buffer_area[i].m_head; - /* -@@ -3599,6 +3769,10 @@ - /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ - if (--i < 0) - i = adapter->num_rx_desc - 1; -+#ifdef LEM_PARAVIRT -+ adapter->csb->guest_rdt = i; -+ if (!adapter->csb->guest_csb_on || adapter->csb->host_need_rxkick) -+#endif /* LEM_PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); - if (done != NULL) - *done = rx_sent; -@@ -4584,6 +4758,8 @@ - return (EINVAL); - info->value = usecs; - ticks = EM_USECS_TO_TICKS(usecs); -+ if (info->offset == E1000_ITR) /* units are 256ns here */ -+ ticks *= 4; - - adapter = info->adapter; - -Index: head/sys/dev/e1000/if_lem.h -=================================================================== ---- head/sys/dev/e1000/if_lem.h (revision 247068) -+++ head/sys/dev/e1000/if_lem.h (working copy) -@@ -265,6 +265,28 @@ - #define PICOSECS_PER_TICK 20833 - #define TSYNC_PORT 319 /* UDP port for the protocol */ - -+#ifdef PARAVIRT -+#define E1000_PARA_SUBDEV 0x1101 /* special id */ -+#define E1000_CSBAL 0x02830 /* csb physical address */ -+#define E1000_CSBAH 0x02834 -+struct e1000_csb { /* comm. block */ -+ uint32_t guest_tdt; /* signals from guest */ -+ uint32_t guest_need_txkick; /* out of tx bufs */ -+ uint32_t guest_need_rxkick; /* out of rx bufs */ -+ uint32_t guest_csb_on; /* mode enabled on the guest */ -+ uint32_t guest_rdt; /* signals from guest */ -+ uint32_t pad[11]; /* to 64 bytes */ -+ -+ uint32_t host_tdh; /* mirror tdh, unused */ -+ uint32_t host_need_txkick; /* enable mode */ -+ uint32_t host_txcycles_lim; /* cycles before stop bh */ -+ uint32_t host_txcycles; /* current bh cycles */ -+ uint32_t host_rdh; /* mirror rdh, unused */ -+ uint32_t host_need_rxkick; /* ??? */ -+ -+}; -+#endif /* PARAVIRT */ -+ - /* - * Bus dma allocation structure used by - * e1000_dma_malloc and e1000_dma_free. -@@ -363,6 +385,7 @@ - struct em_int_delay_info tx_abs_int_delay; - struct em_int_delay_info rx_int_delay; - struct em_int_delay_info rx_abs_int_delay; -+ struct em_int_delay_info tx_itr; - - /* - * Transmit definitions -@@ -436,7 +459,24 @@ - boolean_t pcix_82544; - boolean_t in_detach; - -+#ifdef LEM_SEND_COMBINING -+ /* 0 = idle; 1xxxx int-pending; 3xxxx int + d pending + tdt */ -+#define MIT_PENDING_INT 0x10000 /* pending interrupt */ -+#define MIT_PENDING_TDT 0x30000 /* both intr and tdt write are pending */ -+ uint32_t shadow_tdt; -+ uint32_t tx_sc_on; -+ uint32_t rx_retries; /* optimize rx loop */ -+#endif /* LEM_SEND_COMBINING */ - -+#ifdef LEM_PARAVIRT -+ struct em_dma_alloc csb_mem; /* phys address */ -+ struct e1000_csb *csb; /* virtual addr */ -+ uint32_t tdt_csb_count;// XXX stat -+ uint32_t tdt_reg_count;// XXX stat -+ uint32_t tdt_int_count;// XXX stat -+ uint32_t guest_need_kick_count;// XXX stat -+#endif /* LEM_PARAVIRT */ -+ - struct e1000_hw_stats stats; - }; - -Index: head/tools/tools/netrate/netsend/netsend.c -=================================================================== ---- head/tools/tools/netrate/netsend/netsend.c (revision 247068) -+++ head/tools/tools/netrate/netsend/netsend.c (working copy) -@@ -49,6 +49,7 @@ - int ipv6; - struct timespec interval; - int port, port_max; -+ int burst; - long duration; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; -@@ -164,8 +165,8 @@ - * calls, but also make sure there is at least one every - * some 100 packets. - */ -- if ((long)ns < minres_ns/100) -- gettimeofday_cycles = 100; -+ if ((long)ns < minres_ns/a->burst) -+ gettimeofday_cycles = a->burst; - else - gettimeofday_cycles = minres_ns/ns; - fprintf(stderr, -@@ -288,7 +289,7 @@ - - bzero(&a, sizeof(a)); - -- if (argc != 6) -+ if (argc < 6) - usage(); - - memset(&hints, 0, sizeof(hints)); -@@ -360,6 +361,11 @@ - if (a.duration < 0 || *dummy != '\0') - usage(); - -+ if (argc > 6) -+ a.burst = strtoul(argv[6], NULL, 0); -+ if (a.burst < 1 || a.burst > 10000) -+ a.burst = 100; -+ - a.packet = malloc(payloadsize); - if (a.packet == NULL) { - perror("malloc"); diff --git a/private/extra/20130224-qemu-head.diff b/private/extra/20130224-qemu-head.diff deleted file mode 100644 index ddb6841b0..000000000 --- a/private/extra/20130224-qemu-head.diff +++ /dev/null @@ -1,1499 +0,0 @@ -diff --git a/configure b/configure -index dcaa67c..55ef412 100755 ---- a/configure -+++ b/configure -@@ -146,6 +146,7 @@ curl="" - curses="" - docs="" - fdt="" -+netmap="" - nptl="" - pixman="" - sdl="" -@@ -740,6 +741,10 @@ for opt do - ;; - --enable-vde) vde="yes" - ;; -+ --disable-netmap) netmap="no" -+ ;; -+ --enable-netmap) netmap="yes" -+ ;; - --disable-xen) xen="no" - ;; - --enable-xen) xen="yes" -@@ -1117,6 +1122,8 @@ echo " --disable-uuid disable uuid support" - echo " --enable-uuid enable uuid support" - echo " --disable-vde disable support for vde network" - echo " --enable-vde enable support for vde network" -+echo " --disable-netmap disable support for netmap network" -+echo " --enable-netmap enable support for netmap network" - echo " --disable-linux-aio disable Linux AIO support" - echo " --enable-linux-aio enable Linux AIO support" - echo " --disable-cap-ng disable libcap-ng support" -@@ -1939,6 +1946,26 @@ EOF - fi - - ########################################## -+# netmap headers probe -+if test "$netmap" != "no" ; then -+ cat > $TMPC << EOF -+#include -+#include -+#include -+#include -+int main(void) { return 0; } -+EOF -+ if compile_prog "" "" ; then -+ netmap=yes -+ else -+ if test "$netmap" = "yes" ; then -+ feature_not_found "netmap" -+ fi -+ netmap=no -+ fi -+fi -+ -+########################################## - # libcap-ng library probe - if test "$cap_ng" != "no" ; then - cap_libs="-lcap-ng" -@@ -3364,6 +3391,7 @@ echo "NPTL support $nptl" - echo "GUEST_BASE $guest_base" - echo "PIE $pie" - echo "vde support $vde" -+echo "netmap support $netmap" - echo "Linux AIO support $linux_aio" - echo "ATTR/XATTR support $attr" - echo "Install blobs $blobs" -@@ -3489,6 +3517,9 @@ fi - if test "$vde" = "yes" ; then - echo "CONFIG_VDE=y" >> $config_host_mak - fi -+if test "$netmap" = "yes" ; then -+ echo "CONFIG_NETMAP=y" >> $config_host_mak -+fi - if test "$cap_ng" = "yes" ; then - echo "CONFIG_LIBCAP=y" >> $config_host_mak - fi -diff --git a/exec.c b/exec.c -index a41bcb8..e6ef820 100644 ---- a/exec.c -+++ b/exec.c -@@ -2059,6 +2059,35 @@ static void cpu_notify_map_clients(void) - } - } - -+/* Helper function returning the contiguous segment containing -+ * a guest physical address (gpaddr). -+ * Return 0 if not existing, otherwise the segment covers the -+ * guest physical region *gpa_low .. *gpa_high - 1, and the -+ * guest-physical to host-virtual mapping is obtained as -+ * host_virtual_addr = gp_addr + *g2h_ofs -+ */ -+int address_space_mappable(AddressSpace *as, hwaddr gp_addr, -+ uint64_t *gpa_lo, uint64_t *gpa_hi, uint64_t *g2h_ofs) -+{ -+ AddressSpaceDispatch *d = as->dispatch; -+ MemoryRegionSection *section; -+ RAMBlock *block; -+ -+ section = phys_page_find(d, gp_addr >> TARGET_PAGE_BITS); -+ if (memory_region_is_ram(section->mr) && !section->readonly) { -+ QTAILQ_FOREACH(block, &ram_list.blocks, next) { -+ if (gp_addr - block->offset < block->length) { -+ *gpa_lo = block->offset; -+ *gpa_hi = block->offset + block->length; -+ *g2h_ofs = (uint64_t)block->host - block->offset; -+ return 1; -+ } -+ } -+ } -+ *gpa_lo = *gpa_hi = *g2h_ofs = 0; -+ return 0; /* cannot map */ -+} -+ - /* Map a physical memory region into a host virtual address. - * May map a subset of the requested range, given by and returned in *plen. - * May return NULL if resources needed to perform the mapping are exhausted. -diff --git a/hw/e1000.c b/hw/e1000.c -index d6fe815..16ae5de 100644 ---- a/hw/e1000.c -+++ b/hw/e1000.c -@@ -35,6 +35,59 @@ - - #include "e1000_hw.h" - -+#define MAP_RING /* map the buffers instead of pci_dma_rw() */ -+#define PARAVIRT /* use paravirtualized driver */ -+ -+#ifdef PARAVIRT -+/* -+ Support for virtio-like communication. -+ 1. the VMM advertises virtio-like synchronization setting -+ the subvendor id set to 0x1101 (E1000_PARA_SUBDEV) -+ -+ 2. the guest allocates the shared command status block (csb) and -+ write its physical address at CSBAL and CSBAH (offsets -+ 0x2830 and 0x2834, data is little endian). -+ csb->csb_on enables the mode. If disabled, the device is a -+ regular e1000. -+ -+ 3. notifications for tx and rx are exchanged without vm exits -+ if possible. In particular (only mentioning csb mode below): -+ -+ TX: host sets host_need_txkick=1 when the I/O thread bh is idle. -+ Guest updates guest_tdt and returns if host_need_txkick == 0, -+ otherwise dues a regular write to the TDT. -+ If the txring runs dry, guest sets guest_need_txkick and retries -+ to recover buffers. -+ Host reacts to writes to the TDT by clearing host_need_txkick -+ and scheduling a thread to do the reads. -+ The thread is kept active until there are packets (with a -+ configurable number of retries). Eventually it sets -+ host_need_txkick=1, does a final check for packets and blocks. -+ An interrupt is generated if guest_need_txkick == 1. -+ -+ */ -+#define E1000_PARA_SUBDEV 0x1101 -+#define E1000_CSBAL 0x02830 /* addresses for the csb */ -+#define E1000_CSBAH 0x02834 -+struct e1000_csb { -+ /* these are written by the guest */ -+ uint32_t guest_tdt; /* pkt to transmit */ -+ uint32_t guest_need_txkick; /* ran out of tx bufs, request kick */ -+ uint32_t guest_need_rxkick; /* ran out of rx pkts, request kick ? */ -+ uint32_t guest_csb_on; /* enable paravirtual mode */ -+ uint32_t guest_rdt; /* rx buffers available */ -+ uint32_t pad[11]; -+ -+ /* these are (mostly) written by the host */ -+ uint32_t host_tdh; /* shadow registea, mostly unused */ -+ uint32_t host_need_txkick; /* start the iothread */ -+ uint32_t host_txcycles_lim; /* how much to spin before sleep */ -+ uint32_t host_txcycles; /* counter, but no need to be exported */ -+ uint32_t host_rdh; /* shadow register, mostly unused */ -+ uint32_t host_need_rxkick; /* ??? */ -+}; -+#endif /* PARAVIRT */ -+ - #define E1000_DEBUG - - #ifdef E1000_DEBUG -@@ -72,7 +125,9 @@ static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); - * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested - * Others never tested - */ --enum { E1000_DEVID = E1000_DEV_ID_82540EM }; -+enum { E1000_DEVID = E1000_DEV_ID_82540EM }; // microwire -+//enum { E1000_DEVID = E1000_DEV_ID_82573L }; // eeprom eerd -+// enum { E1000_DEVID = E1000_DEV_ID_82571EB_COPPER }; // eeprom eerd - - /* - * May need to specify additional MAC-to-PHY entries -- -@@ -84,6 +139,18 @@ enum { - /* default to E1000_DEV_ID_82540EM */ 0xc20 - }; - -+/* -+ * map a guest region into a host region -+ * if the pointer is within the region, ofs gives the displacement. -+ * valid = 0 means we should try to map it. -+ */ -+struct guest_memreg_map { -+ int valid; -+ uint64_t lo; -+ uint64_t hi; -+ uint64_t ofs; -+}; -+ - typedef struct E1000State_st { - PCIDevice dev; - NICState *nic; -@@ -131,6 +198,28 @@ typedef struct E1000State_st { - } eecd_state; - - QEMUTimer *autoneg_timer; -+ QEMUTimer *mit_timer; /* handle for the timer */ -+ uint32_t mit_timer_on; /* mitigation timer active */ -+ uint32_t mit_cause; /* pending interrupt cause */ -+ uint32_t mit_on; /* mitigation enable */ -+ -+ /* when the rxq becomes full, disable input until half empty */ -+ uint32_t rxbufs, txbufs, rxq_full; -+#ifdef MAP_RING -+ /* used for map ring */ -+ uint64_t txring_phi, rxring_phi; /* phisical address */ -+ struct e1000_tx_desc *txring; -+ struct e1000_rx_desc *rxring; -+ struct guest_memreg_map mbufs; -+#endif /* MAP_RING */ -+ -+#ifdef PARAVIRT -+ /* used for the communication block */ -+ struct e1000_csb *csb; -+ QEMUBH *tx_bh; -+ uint32_t tx_count; /* written in last round */ -+ QEMUBH *rx_bh; -+#endif /* PARAVIRT */ - } E1000State; - - #define defreg(x) x = (E1000_##x>>2) -@@ -146,8 +235,50 @@ enum { - defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), - defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), - defreg(VET), -+ defreg(RDTR), defreg(RADV), defreg(TADV), defreg(ITR), -+#ifdef PARAVIRT -+ defreg(CSBAL), defreg(CSBAH), -+#endif /* PARAVIRT */ - }; - -+#ifdef MAP_RING -+/* -+ * try to extract an mbuf region -+ */ -+static const uint8_t *map_mbufs(E1000State *s, hwaddr addr) -+{ -+ struct guest_memreg_map *mb = &s->mbufs; -+ uint64_t a = addr; -+ DMAContext *dma; -+ -+ for (;;) { -+ if (mb->valid && a >= mb->lo && a < mb->hi) { -+ return (const uint8_t *)(a + mb->ofs); -+ } -+ dma = pci_dma_context(&s->dev); -+ mb->valid = 1; -+ -+ D("mapping %p is unset", (void *)addr); -+ if (dma_has_iommu(dma)) { -+ D("iommu range, cannot set"); -+ break; -+ } -+ if (!address_space_mappable(dma->as, addr, -+ &mb->lo, &mb->hi, &mb->ofs)) { -+ D("not mappable, cannot set"); -+ break; -+ } -+ D("segment [%p .. %p] delta %p", -+ (void *)mb->lo, (void *)mb->hi, (void *)mb->ofs); -+ -+ D("mapping txring correct %p computed %p", -+ s->txring, (void *)(s->txring_phi + mb->ofs)); -+ } -+ mb->hi = mb->lo = 0; /* empty mapping */ -+ return NULL; -+} -+#endif /* MAP_RING */ -+ - static void - e1000_link_down(E1000State *s) - { -@@ -378,12 +509,12 @@ set_eecd(E1000State *s, int index, uint32_t val) - s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS | - E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ); - if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do -- return; -+ return; - if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state -- s->eecd_state.val_in = 0; -- s->eecd_state.bitnum_in = 0; -- s->eecd_state.bitnum_out = 0; -- s->eecd_state.reading = 0; -+ s->eecd_state.val_in = 0; -+ s->eecd_state.bitnum_in = 0; -+ s->eecd_state.bitnum_out = 0; -+ s->eecd_state.reading = 0; - } - if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge - return; -@@ -543,7 +674,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) - uint32_t txd_lower = le32_to_cpu(dp->lower.data); - uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); - unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; -- unsigned int msh = 0xfffff, hdr = 0; -+ unsigned int hdr = 0; - uint64_t addr; - struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; - struct e1000_tx *tp = &s->tx; -@@ -575,7 +706,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) - } - tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; - } else { -- // legacy descriptor -+ /* legacy descriptor, max len 16288 bytes */ - tp->cptse = 0; - } - -@@ -587,11 +718,30 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) - cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2), - le16_to_cpu(dp->upper.fields.special)); - } -- -+ - addr = le64_to_cpu(dp->buffer_addr); -+ -+#ifdef MAP_RING -+ if (!tp->tse && !tp->cptse && tp->size == 0 && -+ !tp->vlan_needed && !tp->sum_needed && -+ (txd_lower & E1000_TXD_CMD_EOP)) { -+ const uint8_t *x = map_mbufs(s, addr); -+ if (x) { -+ /* XXX optimization for netmap */ -+ e1000_send_packet(s, x, split_size); -+ tp->tso_frames = 0; -+ tp->sum_needed = 0; -+ tp->vlan_needed = 0; -+ tp->size = 0; -+ tp->cptse = 0; -+ return ; -+ } -+ } -+#endif /* MAP_RING */ -+ - if (tp->tse && tp->cptse) { - hdr = tp->hdr_len; -- msh = hdr + tp->mss; -+ unsigned int msh = hdr + tp->mss; - do { - bytes = split_size; - if (tp->size + bytes > msh) -@@ -639,12 +789,16 @@ txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp) - txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) & - ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU); - dp->upper.data = cpu_to_le32(txd_upper); -+#ifdef MAP_RING -+ s->txring[s->mac_reg[TDH]].upper = dp->upper; -+#else /* !MAP_RING */ - pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp), - &dp->upper, sizeof(dp->upper)); -+#endif /* !MAP_RING */ - return E1000_ICR_TXDW; - } - --static uint64_t tx_desc_base(E1000State *s) -+static inline uint64_t tx_desc_base(E1000State *s) - { - uint64_t bah = s->mac_reg[TDBAH]; - uint64_t bal = s->mac_reg[TDBAL] & ~0xf; -@@ -652,6 +806,73 @@ static uint64_t tx_desc_base(E1000State *s) - return (bah << 32) + bal; - } - -+/* helper function, 0 means the value is not set */ -+static inline void -+mit_update_delay(uint32_t *curr, uint32_t value) -+{ -+ if (value && (*curr == 0 || value < *curr)) { -+ *curr = value; -+ } -+} -+ -+/* -+ * If necessary, rearm the timer and post an interrupt. -+ * Called at the end of tx/rx routines (mit_timer_on == 0), -+ * and when the timer fires (mit_timer_on == 1). -+ * We provide a partial implementation of interrupt mitigation, -+ * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for -+ * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV; -+ * relative timers based on TIDV and RDTR are not implemented. -+ */ -+static void -+mit_rearm_and_int(void *opaque) -+{ -+ E1000State *s = opaque; -+ uint32_t mit_delay = 0; -+ -+ /* -+ * Clear the flag. It is only set when the callback fires, -+ * and we need to clear it anyways. -+ */ -+ s->mit_timer_on = 0; -+ if (s->mit_cause == 0) { /* no events pending, we are done */ -+ return; -+ } -+ /* -+ * Compute the next mitigation delay according to pending interrupts -+ * and the current values of RADV (provided RDTR!=0), TADV and ITR. -+ * Then rearm the timer. -+ */ -+ if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW)) { -+ mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); -+ } -+ if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0)) { -+ mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); -+ } -+ mit_update_delay(&mit_delay, s->mac_reg[ITR]); -+ -+ if (mit_delay) { -+ s->mit_timer_on = 1; -+ qemu_mod_timer(s->mit_timer, -+ qemu_get_clock_ns(vm_clock) + mit_delay * 256); -+ } -+ -+ set_ics(s, 0, s->mit_cause); -+ s->mit_cause = 0; -+} -+ -+static void -+mit_set_ics(E1000State *s, uint32_t cause) -+{ -+ if (s->mit_on == 0) { -+ set_ics(s, 0, cause); -+ return; -+ } -+ s->mit_cause |= cause; -+ if (!s->mit_timer_on) -+ mit_rearm_and_int(s); -+} -+ - static void - start_xmit(E1000State *s) - { -@@ -664,10 +885,56 @@ start_xmit(E1000State *s) - return; - } - -+#ifdef MAP_RING -+ base = tx_desc_base(s); -+ if (base != s->txring_phi) { -+ hwaddr desclen = s->mac_reg[TDLEN]; -+ s->txring_phi = base; -+ s->txring = address_space_map(pci_dma_context(&s->dev)->as, -+ base, &desclen, 0 /* is_write */); -+ D("region size is %ld", desclen); -+ } -+#endif /* MAP_RING */ -+ -+#ifdef PARAVIRT -+ /* hlim prevents staying here for too long */ -+ uint32_t hlim = s->mac_reg[TDLEN] / sizeof(desc) / 2; -+ uint32_t csb_mode = s->csb && s->csb->guest_csb_on; -+ s->tx_count = 0; -+ for (;;) { -+ if (csb_mode) { -+ if (s->mac_reg[TDH] == s->mac_reg[TDT]) { -+ /* we ran dry, exchange some notifications */ -+ smp_mb(); /* read from guest ? */ -+ s->mac_reg[TDT] = s->csb->guest_tdt; -+ tdh_start = s->csb->host_tdh = s->mac_reg[TDH]; -+ } -+ if (s->tx_count > hlim || s->mac_reg[TDH] == s->mac_reg[TDT]) { -+ /* still dry, we are done */ -+ s->csb->host_tdh = s->mac_reg[TDH]; -+ if (s->tx_count > 50) { -+ ND("sent %d in this iteration", s->tx_count); -+ } -+ smp_mb(); -+ if (s->csb->guest_need_txkick) { -+ mit_set_ics(s, cause); -+ } -+ return; -+ } -+ } else if (s->mac_reg[TDH] == s->mac_reg[TDT]) { -+ break; -+ } -+ s->tx_count++; -+#else /* !PARAVIRT */ - while (s->mac_reg[TDH] != s->mac_reg[TDT]) { -+#endif /* PARAVIRT */ -+#ifdef MAP_RING -+ desc = s->txring[s->mac_reg[TDH]]; -+#else /* !MAP_RING */ - base = tx_desc_base(s) + - sizeof(struct e1000_tx_desc) * s->mac_reg[TDH]; - pci_dma_read(&s->dev, base, &desc, sizeof(desc)); -+#endif /* MAP_RING */ - - DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH], - (void *)(intptr_t)desc.buffer_addr, desc.lower.data, -@@ -689,7 +956,7 @@ start_xmit(E1000State *s) - break; - } - } -- set_ics(s, 0, cause); -+ mit_set_ics(s, cause); - } - - static int -@@ -764,6 +1031,34 @@ e1000_set_link_status(NetClientState *nc) - static bool e1000_has_rxbufs(E1000State *s, size_t total_size) - { - int bufs; -+#ifdef PARAVIRT -+again: -+ if (s->csb && s->csb->guest_csb_on) { -+ smp_mb(); -+ s->mac_reg[RDT] = s->csb->guest_rdt; -+ } -+ bufs = s->mac_reg[RDT] - s->mac_reg[RDH]; -+ -+ if (bufs < 0) { -+ bufs += s->rxbufs; -+ } -+#if 0 -+ if (s->rxq_full && bufs < s->rxbufs / 2) { -+ return false; /* hysteresis */ -+ } -+#endif -+ s->rxq_full = (total_size > bufs * s->rxbuf_size); -+ if (s->csb && s->csb->guest_csb_on) { -+ if (!s->rxq_full) { -+ s->csb->host_need_rxkick = 0; -+ } else if (!s->csb->host_need_rxkick) { -+ s->csb->host_need_rxkick = 1; -+ goto again; -+ } -+ } -+ return !s->rxq_full; -+#else /* !PARAVIRT */ -+ - /* Fast-path short packets */ - if (total_size <= s->rxbuf_size) { - return s->mac_reg[RDH] != s->mac_reg[RDT]; -@@ -777,6 +1072,7 @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size) - return false; - } - return total_size <= bufs * s->rxbuf_size; -+#endif /* !PARAVIRT */ - } - - static int -@@ -788,7 +1084,7 @@ e1000_can_receive(NetClientState *nc) - (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1); - } - --static uint64_t rx_desc_base(E1000State *s) -+static inline uint64_t rx_desc_base(E1000State *s) - { - uint64_t bah = s->mac_reg[RDBAH]; - uint64_t bal = s->mac_reg[RDBAL] & ~0xf; -@@ -846,6 +1142,13 @@ e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) - size -= 4; - } - -+#ifdef PARAVIRT -+ if (s->csb && s->csb->guest_csb_on) { -+ smp_mb(); -+ s->mac_reg[RDT] = s->csb->guest_rdt; -+ } -+#endif /* PARAVIRT */ -+ - rdh_start = s->mac_reg[RDH]; - desc_offset = 0; - total_size = size + fcs_len(s); -@@ -853,13 +1156,26 @@ e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) - set_ics(s, 0, E1000_ICS_RXO); - return -1; - } -+#ifdef MAP_RING -+ base = rx_desc_base(s); -+ if (base != s->rxring_phi) { -+ hwaddr desclen = s->mac_reg[RDLEN]; -+ s->rxring_phi = base; -+ s->rxring = address_space_map(pci_dma_context(&s->dev)->as, -+ base, &desclen, 0 /* is_write */); -+ } -+#endif /* MAP_RING */ - do { - desc_size = total_size - desc_offset; - if (desc_size > s->rxbuf_size) { - desc_size = s->rxbuf_size; - } - base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH]; -+#ifdef MAP_RING -+ desc = s->rxring[s->mac_reg[RDH]]; -+#else /* !MAP_RING */ - pci_dma_read(&s->dev, base, &desc, sizeof(desc)); -+#endif /* !MAP_RING */ - desc.special = vlan_special; - desc.status |= (vlan_status | E1000_RXD_STAT_DD); - if (desc.buffer_addr) { -@@ -883,7 +1199,12 @@ e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) - } else { // as per intel docs; skip descriptors with null buf addr - DBGOUT(RX, "Null RX descriptor!!\n"); - } -+#ifdef MAP_RING -+ s->rxring[s->mac_reg[RDH]] = desc; -+ /* XXX a barrier ? */ -+#else - pci_dma_write(&s->dev, base, &desc, sizeof(desc)); -+#endif /* !MAP_RING */ - - if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN]) - s->mac_reg[RDH] = 0; -@@ -914,7 +1235,16 @@ e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) - s->rxbuf_min_shift) - n |= E1000_ICS_RXDMT0; - -- set_ics(s, 0, n); -+#ifdef PARAVIRT -+ // XXX in csb mode, if the guest does not need kick, we are done. -+ if (s->csb && s->csb->guest_csb_on) { -+ if (!s->csb->guest_need_rxkick) { -+ ND("guest_need_rxkick off, not kicking"); -+ return size; -+ } -+ } -+#endif /* PARAVIRT */ -+ mit_set_ics(s, n); - - return size; - } -@@ -960,6 +1290,49 @@ mac_writereg(E1000State *s, int index, uint32_t val) - s->mac_reg[index] = val; - } - -+ -+#ifdef PARAVIRT -+static void -+set_32bit(E1000State *s, int index, uint32_t val) -+{ -+ s->mac_reg[index] = val; -+ if (index == CSBAH || index == CSBAL) { -+ hwaddr desclen = 4096; -+ hwaddr base = ((uint64_t)s->mac_reg[CSBAH] << 32) | s->mac_reg[CSBAL]; -+ s->csb = address_space_map(pci_dma_context(&s->dev)->as, -+ base, &desclen, 0 /* is_write */); -+ } -+} -+ -+static void -+e1000_tx_bh(void *opaque) -+{ -+ E1000State *s = opaque; -+ struct e1000_csb *csb = s->csb; -+ -+ ND("starting tdt %d sent %d in prev.round ", csb->guest_tdt, s->tx_count); -+ s->mac_reg[TDT] = csb->guest_tdt; -+ start_xmit(s); -+ csb->host_txcycles = (s->tx_count > 0) ? 0 : csb->host_txcycles+1; -+ if (csb->host_txcycles >= csb->host_txcycles_lim) { -+ /* prepare to sleep, with race avoidance */ -+ csb->host_txcycles = 0; -+ csb->host_need_txkick = 1; -+ ND("tx bh going to sleep, set txkick"); -+ smp_mb(); -+ /* XXX read tdt */ -+ s->mac_reg[TDT] = csb->guest_tdt; -+ if (s->mac_reg[TDH] != s->mac_reg[TDT]) { -+ ND("tx bh race avoidance, clear txkick"); -+ csb->host_need_txkick = 0; -+ } -+ } -+ if (csb->host_need_txkick == 0) { -+ qemu_bh_schedule(s->tx_bh); -+ } -+} -+#endif /* PARAVIRT */ -+ - static void - set_rdt(E1000State *s, int index, uint32_t val) - { -@@ -979,6 +1352,12 @@ static void - set_dlen(E1000State *s, int index, uint32_t val) - { - s->mac_reg[index] = val & 0xfff80; -+ if (index == RDLEN) { -+ s->rxbufs = s->mac_reg[index] / sizeof(struct e1000_rx_desc); -+ s->rxq_full = 0; -+ } else { -+ s->txbufs = s->mac_reg[index] / sizeof(struct e1000_tx_desc); -+ } - } - - static void -@@ -986,6 +1365,16 @@ set_tctl(E1000State *s, int index, uint32_t val) - { - s->mac_reg[index] = val; - s->mac_reg[TDT] &= 0xffff; -+#ifdef PARAVIRT -+ if (s->csb && s->csb->guest_csb_on) { -+ ND("kick accepted tdt %d guest-tdt %d", -+ s->mac_reg[TDT], s->csb->guest_tdt); -+ s->csb->host_need_txkick = 0; /* XXX could be done by the guest */ -+ smp_mb(); /* XXX do we care ? */ -+ qemu_bh_schedule(s->tx_bh); -+ return; -+ } -+#endif /* PARAVIRT */ - start_xmit(s); - } - -@@ -1019,6 +1408,10 @@ static uint32_t (*macreg_readops[])(E1000State *, int) = { - getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), - getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), - getreg(TDLEN), getreg(RDLEN), -+ getreg(RDTR), getreg(RADV), getreg(TADV), getreg(ITR), -+#ifdef PARAVIRT -+ getreg(CSBAL), getreg(CSBAH), -+#endif /* PARAVIRT */ - - [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, - [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, -@@ -1035,6 +1428,11 @@ static void (*macreg_writeops[])(E1000State *, int, uint32_t) = { - putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), - putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), - putreg(RDBAL), putreg(LEDCTL), putreg(VET), -+#ifdef PARAVIRT -+ [CSBAL] = set_32bit, [CSBAH] = set_32bit, -+#endif /* PARAVIRT */ -+ [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, -+ [ITR] = set_16bit, - [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, - [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, - [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, -@@ -1332,6 +1730,13 @@ static int pci_e1000_init(PCIDevice *pci_dev) - - d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d); - -+ d->mit_cause = 0; -+ d->mit_timer_on = 0; -+ d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d); -+ -+#ifdef PARAVIRT -+ d->tx_bh = qemu_bh_new(e1000_tx_bh, d); -+#endif /* PARAVIRT */ - return 0; - } - -@@ -1343,6 +1748,7 @@ static void qdev_e1000_reset(DeviceState *dev) - - static Property e1000_properties[] = { - DEFINE_NIC_PROPERTIES(E1000State, conf), -+ DEFINE_PROP_UINT32("mit_on", E1000State, mit_on, 5), - DEFINE_PROP_END_OF_LIST(), - }; - -@@ -1356,6 +1762,9 @@ static void e1000_class_init(ObjectClass *klass, void *data) - k->romfile = "pxe-e1000.rom"; - k->vendor_id = PCI_VENDOR_ID_INTEL; - k->device_id = E1000_DEVID; -+#ifdef PARAVIRT -+ k->subsystem_id = E1000_PARA_SUBDEV; -+#endif /* PARAVIRT */ - k->revision = 0x03; - k->class_id = PCI_CLASS_NETWORK_ETHERNET; - dc->desc = "Intel Gigabit Ethernet"; -diff --git a/hw/virtio-net.c b/hw/virtio-net.c -index 573c669..5389088 100644 ---- a/hw/virtio-net.c -+++ b/hw/virtio-net.c -@@ -21,6 +21,8 @@ - #include "virtio-net.h" - #include "vhost_net.h" - -+#define VIRTIO_Q_SLOTS 256 // 256 -+ - #define VIRTIO_NET_VM_VERSION 11 - - #define MAC_TABLE_ENTRIES 64 -@@ -49,6 +51,7 @@ typedef struct VirtIONet - NICState *nic; - uint32_t tx_timeout; - int32_t tx_burst; -+ int32_t tx_retries; // XXX lr - uint32_t has_vnet_hdr; - size_t host_hdr_len; - size_t guest_hdr_len; -@@ -1062,7 +1065,10 @@ static void virtio_net_tx_bh(void *opaque) - - /* If we flush a full burst of packets, assume there are - * more coming and immediately reschedule */ -- if (ret >= n->tx_burst) { -+ if (ret == 0) -+ n->tx_retries++; -+ // if (ret >= n->tx_burst) { -+ if (n->tx_retries < 20) { - qemu_bh_schedule(q->tx_bh); - q->tx_waiting = 1; - return; -@@ -1076,6 +1082,8 @@ static void virtio_net_tx_bh(void *opaque) - virtio_queue_set_notification(q->tx_vq, 0); - qemu_bh_schedule(q->tx_bh); - q->tx_waiting = 1; -+ } else { -+ n->tx_retries = 0; - } - } - -@@ -1091,16 +1099,16 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl) - } - - for (i = 1; i < max; i++) { -- n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx); -+ n->vqs[i].rx_vq = virtio_add_queue(vdev, VIRTIO_Q_SLOTS, virtio_net_handle_rx); - if (n->vqs[i].tx_timer) { - n->vqs[i].tx_vq = -- virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer); -+ virtio_add_queue(vdev, VIRTIO_Q_SLOTS, virtio_net_handle_tx_timer); - n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock, - virtio_net_tx_timer, - &n->vqs[i]); - } else { - n->vqs[i].tx_vq = -- virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh); -+ virtio_add_queue(vdev, VIRTIO_Q_SLOTS, virtio_net_handle_tx_bh); - n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]); - } - -@@ -1326,7 +1334,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, - n->vdev.set_status = virtio_net_set_status; - n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask; - n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending; -- n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx); -+ n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, VIRTIO_Q_SLOTS, virtio_net_handle_rx); - n->max_queues = conf->queues; - n->curr_queues = 1; - n->vqs[0].n = n; -@@ -1340,12 +1348,12 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, - } - - if (net->tx && !strcmp(net->tx, "timer")) { -- n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256, -+ n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, VIRTIO_Q_SLOTS, - virtio_net_handle_tx_timer); - n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, - &n->vqs[0]); - } else { -- n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256, -+ n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, VIRTIO_Q_SLOTS, - virtio_net_handle_tx_bh); - n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]); - } -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 2322732..c8f68de 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -833,6 +833,23 @@ void address_space_init(AddressSpace *as, MemoryRegion *root); - void address_space_destroy(AddressSpace *as); - - /** -+ * address_space_mappable: return region containing a guest address. -+ * -+ * If the guest physical address is mappable in host virtual memory, -+ * the function returns the containing region for which the -+ * mapping is valid, and the offset to be added to the gpa -+ * to generate a host virtual address. -+ * -+ * @as: #AddressSpace to be accessed -+ * @addr: address within that address space -+ * @lo: pointer to the initial address in the range -+ * @hi: pointer after the final address in the range -+ * @ofs: pointer to the delta between the two addresses -+ */ -+int address_space_mappable(AddressSpace *as, hwaddr addr, uint64_t *lo, -+ uint64_t *hi, uint64_t *ofs); -+ -+/** - * address_space_rw: read from or write to an address space. - * - * @as: #AddressSpace to be accessed -diff --git a/include/net/net.h b/include/net/net.h -index 43a045e..20d3f22 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -11,6 +11,33 @@ - - #define MAX_QUEUE_NUM 1024 - -+#ifndef ND -+#define ND(fd, ...) /* debugging */ -+#define D(format, ...) \ -+ do { \ -+ struct timeval __xxts; \ -+ gettimeofday(&__xxts, NULL); \ -+ printf("%03d.%06d %s [%d] " format "\n", \ -+ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ -+ __func__, __LINE__, ##__VA_ARGS__); \ -+ } while (0) -+ -+/* rate limited, lps indicates how many per second */ -+#define RD(lps, format, ...) \ -+ do { \ -+ static int t0, __cnt; \ -+ struct timeval __xxts; \ -+ gettimeofday(&__xxts, NULL); \ -+ if (t0 != __xxts.tv_sec) { \ -+ t0 = __xxts.tv_sec; \ -+ __cnt = 0; \ -+ } \ -+ if (__cnt++ < lps) { \ -+ D(format, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+#endif -+ - struct MACAddr { - uint8_t a[6]; - }; -diff --git a/net/Makefile.objs b/net/Makefile.objs -index a08cd14..8cb9f2f 100644 ---- a/net/Makefile.objs -+++ b/net/Makefile.objs -@@ -10,3 +10,4 @@ common-obj-$(CONFIG_AIX) += tap-aix.o - common-obj-$(CONFIG_HAIKU) += tap-haiku.o - common-obj-$(CONFIG_SLIRP) += slirp.o - common-obj-$(CONFIG_VDE) += vde.o -+common-obj-$(CONFIG_NETMAP) += netmap.o -diff --git a/net/clients.h b/net/clients.h -index 7793294..952d076 100644 ---- a/net/clients.h -+++ b/net/clients.h -@@ -52,4 +52,8 @@ int net_init_vde(const NetClientOptions *opts, const char *name, - NetClientState *peer); - #endif - -+#ifdef CONFIG_NETMAP -+int net_init_netmap(const NetClientOptions *opts, const char *name, -+ NetClientState *peer); -+#endif - #endif /* QEMU_NET_CLIENTS_H */ -diff --git a/net/hub.c b/net/hub.c -index a24c9d1..df32074 100644 ---- a/net/hub.c -+++ b/net/hub.c -@@ -338,3 +338,17 @@ void net_hub_check_clients(void) - } - } - } -+ -+bool net_hub_flush(NetClientState *nc) -+{ -+ NetHubPort *port; -+ NetHubPort *source_port = DO_UPCAST(NetHubPort, nc, nc); -+ int ret = 0; -+ -+ QLIST_FOREACH(port, &source_port->hub->ports, next) { -+ if (port != source_port) { -+ ret += qemu_net_queue_flush(port->nc.send_queue); -+ } -+ } -+ return ret ? true : false; -+} -diff --git a/net/hub.h b/net/hub.h -index 583ada8..a625eff 100644 ---- a/net/hub.h -+++ b/net/hub.h -@@ -21,5 +21,6 @@ NetClientState *net_hub_add_port(int hub_id, const char *name); - NetClientState *net_hub_find_client_by_name(int hub_id, const char *name); - void net_hub_info(Monitor *mon); - void net_hub_check_clients(void); -+bool net_hub_flush(NetClientState *nc); - - #endif /* NET_HUB_H */ -diff --git a/net/net.c b/net/net.c -index be03a8d..3dceb29 100644 ---- a/net/net.c -+++ b/net/net.c -@@ -441,6 +441,12 @@ void qemu_flush_queued_packets(NetClientState *nc) - { - nc->receive_disabled = 0; - -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { -+ if (net_hub_flush(nc->peer)) { -+ qemu_notify_event(); -+ } -+ return; -+ } - if (qemu_net_queue_flush(nc->send_queue)) { - /* We emptied the queue successfully, signal to the IO thread to repoll - * the file descriptor (for tap, for example). -@@ -480,7 +486,8 @@ ssize_t qemu_send_packet_async(NetClientState *sender, - - void qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size) - { -- qemu_send_packet_async(nc, buf, size, NULL); -+ qemu_send_packet_async_with_flags(nc, QEMU_NET_PACKET_FLAG_NONE, -+ buf, size, NULL); - } - - ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) -@@ -723,6 +730,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( - [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, - #endif - [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, -+#ifdef CONFIG_NETMAP -+ [NET_CLIENT_OPTIONS_KIND_NETMAP] = net_init_netmap, -+#endif - }; - - -diff --git a/net/netmap.c b/net/netmap.c -new file mode 100644 -index 0000000..794a7f4 ---- /dev/null -+++ b/net/netmap.c -@@ -0,0 +1,364 @@ -+/* -+ * netmap access for qemu -+ * -+ * Copyright (c) 2012-2013 Luigi Rizzo -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "config-host.h" -+ -+/* note paths are different for -head and 1.3 */ -+#include "net/net.h" -+#include "clients.h" -+#include "sysemu/sysemu.h" -+#include "qemu-common.h" -+#include "qemu/error-report.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifndef ND -+#define ND(fd, ...) /* debugging */ -+#define D(format, ...) \ -+ do { \ -+ struct timeval __xxts; \ -+ gettimeofday(&__xxts, NULL); \ -+ printf("%03d.%06d %s [%d] " format "\n", \ -+ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ -+ __func__, __LINE__, ##__VA_ARGS__); \ -+ } while (0) -+ -+/* rate limited, lps indicates how many per second */ -+#define RD(lps, format, ...) \ -+ do { \ -+ static int t0, __cnt; \ -+ struct timeval __xxts; \ -+ gettimeofday(&__xxts, NULL); \ -+ if (t0 != __xxts.tv_sec) { \ -+ t0 = __xxts.tv_sec; \ -+ __cnt = 0; \ -+ } \ -+ if (__cnt++ < lps) { \ -+ D(format, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+#endif -+ -+ -+/* -+ * private netmap device info -+ */ -+struct netmap_state { -+ int fd; -+ int memsize; -+ void *mem; -+ struct netmap_if *nifp; -+ struct netmap_ring *rx; -+ struct netmap_ring *tx; -+ char fdname[128]; /* normally /dev/netmap */ -+ char ifname[128]; /* maybe the nmreq here ? */ -+}; -+ -+struct nm_state { -+ NetClientState nc; -+ struct netmap_state me; -+ unsigned int read_poll; -+ unsigned int write_poll; -+}; -+ -+#ifndef __FreeBSD__ -+#define pkt_copy bcopy -+#else -+/* a fast copy routine only for multiples of 64 bytes, non overlapped. */ -+static inline void -+pkt_copy(const void *_src, void *_dst, int l) -+{ -+ const uint64_t *src = _src; -+ uint64_t *dst = _dst; -+#define likely(x) __builtin_expect(!!(x), 1) -+#define unlikely(x) __builtin_expect(!!(x), 0) -+ if (unlikely(l >= 1024)) { -+ bcopy(src, dst, l); -+ return; -+ } -+ for (; l > 0; l -= 64) { -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ } -+} -+#endif /* __FreeBSD__ */ -+ -+ -+/* -+ * open a netmap device. We assume there is only one queue -+ * (which is the case for the VALE bridge). -+ */ -+static int netmap_open(struct netmap_state *me) -+{ -+ int fd, err; -+ size_t l; -+ struct nmreq req; -+ -+ me->fd = fd = open(me->fdname, O_RDWR); -+ if (fd < 0) { -+ error_report("Unable to open netmap device '%s'", me->fdname); -+ return -1; -+ } -+ bzero(&req, sizeof(req)); -+ pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname); -+ req.nr_ringid = 0; -+ req.nr_version = NETMAP_API; -+ err = ioctl(fd, NIOCGINFO, &req); -+ if (err) { -+ error_report("cannot get info on %s", me->ifname); -+ goto error; -+ } -+ l = me->memsize = req.nr_memsize; -+ err = ioctl(fd, NIOCREGIF, &req); -+ if (err) { -+ error_report("Unable to register %s", me->ifname); -+ goto error; -+ } -+ -+ me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); -+ if (me->mem == MAP_FAILED) { -+ error_report("Unable to mmap"); -+ me->mem = NULL; -+ goto error; -+ } -+ -+ me->nifp = NETMAP_IF(me->mem, req.nr_offset); -+ me->tx = NETMAP_TXRING(me->nifp, 0); -+ me->rx = NETMAP_RXRING(me->nifp, 0); -+ return 0; -+ -+error: -+ close(me->fd); -+ return -1; -+} -+ -+/* XXX do we need the can-send routine ? */ -+static int netmap_can_send(void *opaque) -+{ -+ struct nm_state *s = opaque; -+ -+ return qemu_can_send_packet(&s->nc); -+} -+ -+static void netmap_send(void *opaque); -+static void netmap_writable(void *opaque); -+ -+/* -+ * set the handlers for the device -+ */ -+static void netmap_update_fd_handler(struct nm_state *s) -+{ -+ qemu_set_fd_handler2(s->me.fd, -+ s->read_poll ? netmap_can_send : NULL, -+ s->read_poll ? netmap_send : NULL, -+ s->write_poll ? netmap_writable : NULL, -+ s); -+} -+ -+/* update the read handler */ -+static void netmap_read_poll(struct nm_state *s, bool enable) -+{ -+ if (s->read_poll != enable) { /* do nothing if not changed */ -+ s->read_poll = enable; -+ netmap_update_fd_handler(s); -+ } -+} -+ -+/* update the write handler */ -+static void netmap_write_poll(struct nm_state *s, bool enable) -+{ -+ if (s->write_poll != enable) { -+ s->write_poll = enable; -+ netmap_update_fd_handler(s); -+ } -+} -+ -+static void netmap_poll(NetClientState *nc, bool enable) -+{ -+ struct nm_state *s = DO_UPCAST(struct nm_state, nc, nc); -+ -+ if (s->read_poll != enable || s->write_poll != enable) { -+ s->read_poll = enable; -+ s->read_poll = enable; -+ netmap_update_fd_handler(s); -+ } -+} -+ -+/* -+ * the fd_write() callback, invoked if the fd is marked as -+ * writable after a poll. Reset the handler and flush any -+ * buffered packets. -+ */ -+static void netmap_writable(void *opaque) -+{ -+ struct nm_state *s = opaque; -+ -+ netmap_write_poll(s, false); -+ qemu_flush_queued_packets(&s->nc); -+} -+ -+/* -+ * new data guest --> backend -+ */ -+static ssize_t netmap_receive_raw(NetClientState *nc, -+ const uint8_t *buf, size_t size) -+{ -+ struct nm_state *s = DO_UPCAST(struct nm_state, nc, nc); -+ struct netmap_ring *ring = s->me.tx; -+ -+ if (size > ring->nr_buf_size) { -+ RD(5, "drop packet of size %d > %d", (int)size, ring->nr_buf_size); -+ return size; -+ } -+ -+ if (ring) { -+ /* request an early notification to avoid running dry */ -+ if (ring->avail < ring->num_slots / 2 && s->write_poll == false) { -+ netmap_write_poll(s, true); -+ } -+ if (ring->avail == 0) { /* cannot write */ -+ return 0; -+ } -+ uint32_t i = ring->cur; -+ uint32_t idx = ring->slot[i].buf_idx; -+ uint8_t *dst = (uint8_t *)NETMAP_BUF(ring, idx); -+ -+ ring->slot[i].len = size; -+ pkt_copy(buf, dst, size); -+ ring->cur = NETMAP_RING_NEXT(ring, i); -+ ring->avail--; -+ } -+ return size; -+} -+ -+/* complete a previous send (backend --> guest), enable the fd_read callback */ -+static void netmap_send_completed(NetClientState *nc, ssize_t len) -+{ -+ struct nm_state *s = DO_UPCAST(struct nm_state, nc, nc); -+ -+ netmap_read_poll(s, true); -+} -+ -+/* -+ * netmap_send: backend -> guest -+ * there is traffic available from the network, try to send it up. -+ */ -+static void netmap_send(void *opaque) -+{ -+ struct nm_state *s = opaque; -+ struct netmap_ring *ring = s->me.rx; -+ -+ /* only check ring->avail, let the packet be queued -+ * with qemu_send_packet_async() if needed -+ * XXX until we fix the propagation on the bridge we need to stop early -+ */ -+ while (ring->avail > 0 && qemu_can_send_packet(&s->nc)) { -+ uint32_t i = ring->cur; -+ uint32_t idx = ring->slot[i].buf_idx; -+ uint8_t *src = (u_char *)NETMAP_BUF(ring, idx); -+ int size = ring->slot[i].len; -+ -+ ring->cur = NETMAP_RING_NEXT(ring, i); -+ ring->avail--; -+ size = qemu_send_packet_async(&s->nc, src, size, netmap_send_completed); -+ if (size == 0) { -+ /* the guest does not receive anymore. Packet is queued, stop -+ * reading from the backend until netmap_send_completed() -+ */ -+ netmap_read_poll(s, false); -+ return; -+ } -+ } -+ netmap_read_poll(s, true); /* probably useless. */ -+} -+ -+ -+/* flush and close */ -+static void netmap_cleanup(NetClientState *nc) -+{ -+ struct nm_state *s = DO_UPCAST(struct nm_state, nc, nc); -+ -+ qemu_purge_queued_packets(nc); -+ -+ netmap_poll(nc, false); -+ munmap(s->me.mem, s->me.memsize); -+ close(s->me.fd); -+ -+ s->me.fd = -1; -+} -+ -+ -+ -+/* fd support */ -+ -+static NetClientInfo net_netmap_info = { -+ .type = NET_CLIENT_OPTIONS_KIND_NETMAP, -+ .size = sizeof(struct nm_state), -+ .receive = netmap_receive_raw, -+#if 0 /* not implemented */ -+ .receive_raw = netmap_receive_raw, -+ .receive_iov = netmap_receive_iov, -+#endif -+ .poll = netmap_poll, -+ .cleanup = netmap_cleanup, -+}; -+ -+/* the external calls */ -+ -+/* -+ * ... -net netmap,ifname="..." -+ */ -+int net_init_netmap(const NetClientOptions *opts, -+ const char *name, NetClientState *peer) -+{ -+ const NetdevNetmapOptions *netmap_opts = opts->netmap; -+ NetClientState *nc; -+ struct netmap_state me; -+ struct nm_state *s; -+ -+ pstrcpy(me.fdname, sizeof(me.fdname), name ? name : "/dev/netmap"); -+ /* set default name for the port if not supplied */ -+ pstrcpy(me.ifname, sizeof(me.ifname), -+ netmap_opts->has_ifname ? netmap_opts->ifname : "vale0"); -+ if (netmap_open(&me)) { -+ return -1; -+ } -+ /* create the object -- XXX use name or ifname ? */ -+ nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); -+ s = DO_UPCAST(struct nm_state, nc, nc); -+ s->me = me; -+ netmap_read_poll(s, true); /* initially only poll for reads. */ -+ -+ return 0; -+} -diff --git a/net/queue.c b/net/queue.c -index 6eaf5b6..859d02a 100644 ---- a/net/queue.c -+++ b/net/queue.c -@@ -50,6 +50,8 @@ struct NetPacket { - - struct NetQueue { - void *opaque; -+ uint32_t nq_maxlen; -+ uint32_t nq_count; - - QTAILQ_HEAD(packets, NetPacket) packets; - -@@ -63,6 +65,8 @@ NetQueue *qemu_new_net_queue(void *opaque) - queue = g_malloc0(sizeof(NetQueue)); - - queue->opaque = opaque; -+ queue->nq_maxlen = 10000; -+ queue->nq_count = 0; - - QTAILQ_INIT(&queue->packets); - -@@ -92,6 +96,9 @@ static void qemu_net_queue_append(NetQueue *queue, - { - NetPacket *packet; - -+ if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { -+ return; /* drop if queue full and no callback */ -+ } - packet = g_malloc(sizeof(NetPacket) + size); - packet->sender = sender; - packet->flags = flags; -@@ -99,6 +106,7 @@ static void qemu_net_queue_append(NetQueue *queue, - packet->sent_cb = sent_cb; - memcpy(packet->data, buf, size); - -+ queue->nq_count++; - QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - } - -@@ -113,6 +121,9 @@ static void qemu_net_queue_append_iov(NetQueue *queue, - size_t max_len = 0; - int i; - -+ if (queue->nq_count >= queue->nq_maxlen && !sent_cb) { -+ return; /* drop if queue full and no callback */ -+ } - for (i = 0; i < iovcnt; i++) { - max_len += iov[i].iov_len; - } -@@ -130,6 +141,7 @@ static void qemu_net_queue_append_iov(NetQueue *queue, - packet->size += len; - } - -+ queue->nq_count++; - QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - } - -@@ -220,6 +232,7 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from) - QTAILQ_FOREACH_SAFE(packet, &queue->packets, entry, next) { - if (packet->sender == from) { - QTAILQ_REMOVE(&queue->packets, packet, entry); -+ queue->nq_count--; - g_free(packet); - } - } -@@ -233,6 +246,7 @@ bool qemu_net_queue_flush(NetQueue *queue) - - packet = QTAILQ_FIRST(&queue->packets); - QTAILQ_REMOVE(&queue->packets, packet, entry); -+ queue->nq_count--; - - ret = qemu_net_queue_deliver(queue, - packet->sender, -@@ -240,6 +254,7 @@ bool qemu_net_queue_flush(NetQueue *queue) - packet->data, - packet->size); - if (ret == 0) { -+ queue->nq_count++; - QTAILQ_INSERT_HEAD(&queue->packets, packet, entry); - return false; - } -diff --git a/qapi-schema.json b/qapi-schema.json -index cd7ea25..b6316e1 100644 ---- a/qapi-schema.json -+++ b/qapi-schema.json -@@ -2641,6 +2641,11 @@ - 'data': { - 'hubid': 'int32' } } - -+{ 'type': 'NetdevNetmapOptions', -+ 'data': { -+ '*ifname': 'str' } } -+ -+ - ## - # @NetClientOptions - # -@@ -2658,7 +2663,8 @@ - 'vde': 'NetdevVdeOptions', - 'dump': 'NetdevDumpOptions', - 'bridge': 'NetdevBridgeOptions', -- 'hubport': 'NetdevHubPortOptions' } } -+ 'hubport': 'NetdevHubPortOptions', -+ 'netmap': 'NetdevNetmapOptions' } } - - ## - # @NetLegacy diff --git a/private/extra/20140109-click.diff b/private/extra/20140109-click.diff deleted file mode 100644 index cac7aecbf..000000000 --- a/private/extra/20140109-click.diff +++ /dev/null @@ -1,309 +0,0 @@ -diff --git a/elements/userlevel/fromdevice.cc b/elements/userlevel/fromdevice.cc -index 3da8dab..26f4299 100644 ---- a/elements/userlevel/fromdevice.cc -+++ b/elements/userlevel/fromdevice.cc -@@ -471,15 +471,16 @@ FromDevice::netmap_dispatch() - struct netmap_ring *ring = NETMAP_RXRING(_netmap.nifp, ri); - //click_chatter("netmap dispatch %s %u %u %u %u", _ifname.c_str(), ri, ring->cur, ring->reserved, ring->avail); - -- while (ring->reserved > 0 && NetmapInfo::refill(ring)) -+ while (ring->head != ring->cur && _netmap.refill(ring)) - /* click_chatter("Refilled") */; - -- if (ring->avail == 0) -+ if (nm_ring_empty(ring)) - continue; - -- int nzcopy = (int) (ring->num_slots / 2) - (int) ring->reserved; -+ // we let at most half a ring of zerocopy packets -+ int nzcopy = (int)(ring->num_slots / 2) - NetmapInfo::reserved(ring); - -- while (n != _burst && ring->avail > 0) { -+ while (n != _burst && !nm_ring_empty(ring)) { - unsigned cur = ring->cur; - unsigned buf_idx = ring->slot[cur].buf_idx; - if (buf_idx < 2) -@@ -488,16 +489,15 @@ FromDevice::netmap_dispatch() - - WritablePacket *p; - if (nzcopy > 0) { -- p = Packet::make(buf, ring->slot[cur].len, NetmapInfo::buffer_destructor); -- ++ring->reserved; -+ p = Packet::make(buf, ring->slot[cur].len, NetmapInfo::buffer_destructor, (void *)&_netmap); - --nzcopy; -- } else { -+ } else { // copy and release the buffer at ring->head - p = Packet::make(_headroom, buf, ring->slot[cur].len, 0); -- unsigned res1idx = NETMAP_RING_FIRST_RESERVED(ring); -- ring->slot[res1idx].buf_idx = buf_idx; -+ ring->slot[ring->head].buf_idx = buf_idx; -+ ring->slot[ring->head].flags = NS_BUF_CHANGED; -+ ring->head = nm_ring_next(ring, ring->head); - } -- ring->cur = NETMAP_RING_NEXT(ring, ring->cur); -- --ring->avail; -+ ring->cur = nm_ring_next(ring, cur); - ++n; - - emit_packet(p, 0, ring->ts); -diff --git a/elements/userlevel/netmapinfo.cc b/elements/userlevel/netmapinfo.cc -index ca591f6..2dacebd 100644 ---- a/elements/userlevel/netmapinfo.cc -+++ b/elements/userlevel/netmapinfo.cc -@@ -28,11 +28,6 @@ - CLICK_DECLS - - static Spinlock netmap_memory_lock; --static void *netmap_memory = MAP_FAILED; --static size_t netmap_memory_size; --static uint32_t netmap_memory_users; -- --unsigned char *NetmapInfo::buffers; - - int - NetmapInfo::ring::open(const String &ifname, -@@ -54,32 +49,23 @@ NetmapInfo::ring::open(const String &ifname, - req.nr_version = NETMAP_API; - #endif - int r; -- if ((r = ioctl(fd, NIOCGINFO, &req))) { -- initial_errh->error("netmap %s: %s", ifname.c_str(), strerror(errno)); -+ if ((r = ioctl(fd, NIOCREGIF, &req))) { -+ errh->error("netmap register %s: %s", ifname.c_str(), strerror(errno)); - error: - close(fd); - return -1; - } -- size_t memsize = req.nr_memsize; -- -- if ((r = ioctl(fd, NIOCREGIF, &req))) { -- errh->error("netmap register %s: %s", ifname.c_str(), strerror(errno)); -- goto error; -- } - - netmap_memory_lock.acquire(); -- if (netmap_memory == MAP_FAILED) { -- netmap_memory_size = memsize; -- netmap_memory = mmap(0, netmap_memory_size, PROT_WRITE | PROT_READ, -+ buffers = 0; -+ memsize = req.nr_memsize; -+ mem = (char *)mmap(0, memsize, PROT_WRITE | PROT_READ, - MAP_SHARED, fd, 0); -- if (netmap_memory == MAP_FAILED) { -- errh->error("netmap allocate %s: %s", ifname.c_str(), strerror(errno)); -- netmap_memory_lock.release(); -- goto error; -- } -+ if (mem == MAP_FAILED) { -+ errh->error("netmap allocate %s: %s", ifname.c_str(), strerror(errno)); -+ netmap_memory_lock.release(); -+ goto error; - } -- mem = (char *) netmap_memory; -- ++netmap_memory_users; - netmap_memory_lock.release(); - - nifp = NETMAP_IF(mem, req.nr_offset); -@@ -110,12 +96,11 @@ void - NetmapInfo::ring::close(int fd) - { - netmap_memory_lock.acquire(); -- if (--netmap_memory_users <= 0 && netmap_memory != MAP_FAILED) { -- munmap(netmap_memory, netmap_memory_size); -- netmap_memory = MAP_FAILED; -+ if (mem != MAP_FAILED) { -+ munmap(mem, memsize); -+ mem = (char *)MAP_FAILED; - } - netmap_memory_lock.release(); -- ioctl(fd, NIOCUNREGIF, (struct nmreq *) 0); - ::close(fd); - } - -diff --git a/elements/userlevel/netmapinfo.hh b/elements/userlevel/netmapinfo.hh -index be8acae..d436f51 100644 ---- a/elements/userlevel/netmapinfo.hh -+++ b/elements/userlevel/netmapinfo.hh -@@ -12,36 +12,47 @@ class NetmapInfo { public: - - struct ring { - char *mem; -+ size_t memsize; - unsigned ring_begin; - unsigned ring_end; - struct netmap_if *nifp; -+ unsigned char *buffers; // XXX released bufs, not thread safe - - int open(const String &ifname, - bool always_error, ErrorHandler *errh); - void initialize_rings_rx(int timestamp); - void initialize_rings_tx(); - void close(int fd); -+ // XXX return a buffer to the ring -+ bool refill(struct netmap_ring *ring) { -+ if (buffers) { -+ unsigned char *buf = buffers; -+ buffers = *reinterpret_cast(buffers); -+ unsigned res1idx = ring->head; -+ ring->slot[res1idx].buf_idx = NETMAP_BUF_IDX(ring, (char *) buf); -+ ring->slot[res1idx].flags |= NS_BUF_CHANGED; -+ ring->head = nm_ring_next(ring, res1idx); -+ return true; -+ } else -+ return false; -+ } - }; - -- static unsigned char *buffers; // XXX not thread safe - static bool is_netmap_buffer(Packet *p) { - return p->buffer_destructor() == buffer_destructor; - } -- static void buffer_destructor(unsigned char *buf, size_t) { -- *reinterpret_cast(buf) = buffers; -- buffers = buf; -+ static void buffer_destructor(unsigned char *buf, size_t, void *arg) { -+ struct ring *ring = reinterpret_cast(arg); -+ *reinterpret_cast(buf) = ring->buffers; -+ ring->buffers = buf; - } -- static bool refill(struct netmap_ring *ring) { -- if (buffers) { -- unsigned char *buf = buffers; -- buffers = *reinterpret_cast(buffers); -- unsigned res1idx = NETMAP_RING_FIRST_RESERVED(ring); -- ring->slot[res1idx].buf_idx = NETMAP_BUF_IDX(ring, (char *) buf); -- ring->slot[res1idx].flags |= NS_BUF_CHANGED; -- --ring->reserved; -- return true; -- } else -- return false; -+ -+ // return number of reserved buffers -+ static int reserved(struct netmap_ring *ring) { -+ int ret = ring->cur - ring->head; -+ if (ret < 0) -+ ret += ring->num_slots; -+ return ret; - } - - }; -diff --git a/elements/userlevel/todevice.cc b/elements/userlevel/todevice.cc -index 31fa72b..cdd7da2 100644 ---- a/elements/userlevel/todevice.cc -+++ b/elements/userlevel/todevice.cc -@@ -285,7 +285,7 @@ ToDevice::netmap_send_packet(Packet *p) - { - for (unsigned ri = _netmap.ring_begin; ri != _netmap.ring_end; ++ri) { - struct netmap_ring *ring = NETMAP_TXRING(_netmap.nifp, ri); -- if (ring->avail == 0) -+ if (nm_ring_empty(ring)) - continue; - unsigned cur = ring->cur; - unsigned buf_idx = ring->slot[cur].buf_idx; -@@ -295,17 +295,17 @@ ToDevice::netmap_send_packet(Packet *p) - uint32_t p_length = p->length(); - if (NetmapInfo::is_netmap_buffer(p) - && !p->shared() && p->buffer() == p->data() -+ && (char *)p->buffer() >= _netmap.mem && (char *)p->buffer() < _netmap.mem + _netmap.memsize - && noutputs() == 0) { - ring->slot[cur].buf_idx = NETMAP_BUF_IDX(ring, (char *) p->buffer()); - ring->slot[cur].flags |= NS_BUF_CHANGED; -- NetmapInfo::buffer_destructor(buf, 0); -+ NetmapInfo::buffer_destructor(buf, 0, (void *)&_netmap); - p->reset_buffer(); - } else - memcpy(buf, p->data(), p_length); - ring->slot[cur].len = p_length; - __asm__ volatile("" : : : "memory"); -- ring->cur = NETMAP_RING_NEXT(ring, cur); -- ring->avail--; -+ ring->head = ring->cur = nm_ring_next(ring, cur); - return 0; - } - errno = ENOBUFS; -diff --git a/include/click/packet.hh b/include/click/packet.hh -index 165a6d3..d03a819 100644 ---- a/include/click/packet.hh -+++ b/include/click/packet.hh -@@ -58,9 +58,10 @@ class Packet { public: - static inline Packet *make(struct mbuf *mbuf) CLICK_WARN_UNUSED_RESULT; - #endif - #if CLICK_USERLEVEL -- typedef void (*buffer_destructor_type)(unsigned char *buf, size_t sz); -+ typedef void (*buffer_destructor_type)(unsigned char *buf, size_t sz, void *arg); - static WritablePacket *make(unsigned char *data, uint32_t length, -- buffer_destructor_type buffer_destructor) CLICK_WARN_UNUSED_RESULT; -+ buffer_destructor_type buffer_destructor, -+ void *arg) CLICK_WARN_UNUSED_RESULT; - #endif - - static void static_cleanup(); -@@ -724,6 +725,7 @@ class Packet { public: - unsigned char *_end; /* one beyond end of allocated buffer */ - # if CLICK_USERLEVEL - buffer_destructor_type _destructor; -+ void *_destructor_arg; - # endif - # if CLICK_BSDMODULE - struct mbuf *_m; -diff --git a/lib/fromfile.cc b/lib/fromfile.cc -index 8827455..4b691d7 100644 ---- a/lib/fromfile.cc -+++ b/lib/fromfile.cc -@@ -118,7 +118,7 @@ FromFile::warning(ErrorHandler *errh, const char *format, ...) const - - #ifdef ALLOW_MMAP - static void --munmap_destructor(unsigned char *data, size_t amount) -+munmap_destructor(unsigned char *data, size_t amount, void *arg) - { - if (munmap((caddr_t)data, amount) < 0) - click_chatter("FromFile: munmap: %s", strerror(errno)); -@@ -156,7 +156,7 @@ FromFile::read_buffer_mmap(ErrorHandler *errh) - if (mmap_data == MAP_FAILED) - return error(errh, "mmap: %s", strerror(errno)); - -- _data_packet = Packet::make((unsigned char *)mmap_data, _len, munmap_destructor); -+ _data_packet = Packet::make((unsigned char *)mmap_data, _len, munmap_destructor, 0); - _buffer = _data_packet->data(); - _file_offset = _mmap_off; - _mmap_off += _len; -diff --git a/lib/packet.cc b/lib/packet.cc -index 65c35c0..3e048d7 100644 ---- a/lib/packet.cc -+++ b/lib/packet.cc -@@ -212,7 +212,7 @@ Packet::~Packet() - _data_packet->kill(); - # if CLICK_USERLEVEL - else if (_head && _destructor) -- _destructor(_head, _end - _head); -+ _destructor(_head, _end - _head, _destructor_arg); - else - delete[] _head; - # elif CLICK_BSDMODULE -@@ -552,7 +552,7 @@ Packet::make(uint32_t headroom, const void *data, - * null. */ - WritablePacket * - Packet::make(unsigned char *data, uint32_t length, -- buffer_destructor_type destructor) -+ buffer_destructor_type destructor, void *arg) - { - # if HAVE_CLICK_PACKET_POOL - WritablePacket *p = WritablePacket::pool_allocate(false); -@@ -564,6 +564,7 @@ Packet::make(unsigned char *data, uint32_t length, - p->_head = p->_data = data; - p->_tail = p->_end = data + length; - p->_destructor = destructor; -+ p->_destructor_arg = arg; - } - return p; - } -@@ -735,7 +736,7 @@ Packet::expensive_uniqueify(int32_t extra_headroom, int32_t extra_tailroom, - _data_packet->kill(); - # if CLICK_USERLEVEL - else if (_destructor) -- _destructor(old_head, old_end - old_head); -+ _destructor(old_head, old_end - old_head, _destructor_arg); - else - delete[] old_head; - _destructor = 0; diff --git a/private/extra/README b/private/extra/README deleted file mode 100644 index 3fb551511..000000000 --- a/private/extra/README +++ /dev/null @@ -1,8 +0,0 @@ -Extra files related to netmap and qemu - - -bsd-lem-mitigation.diff - extensions to the interrupt mitigation for qemu - -qemu-1.2.0-e1000-mitigation.diff - emulation of interrupt mitigation registers diff --git a/private/extra/bro-netmap.diff b/private/extra/bro-netmap.diff deleted file mode 100644 index 207e6412c..000000000 --- a/private/extra/bro-netmap.diff +++ /dev/null @@ -1,95 +0,0 @@ -diff --git a/src/PktSrc.cc b/src/PktSrc.cc -index 9d6bce6..e8f59dd 100644 ---- a/src/PktSrc.cc -+++ b/src/PktSrc.cc -@@ -11,6 +11,26 @@ - #include "Net.h" - #include "Sessions.h" - -+#define HAVE_NETMAP -+ -+#ifdef HAVE_NETMAP -+ -+// Compile in netmap support. If the interface name starts with -+// "netmap:" or "vale" we use a netmap fd instead of pcap, and bind -+// one or all rings depending on NETMAP_RING_ID environment variable. -+// -+// For a test run you can use the vale switch, -+// pkt-gen -i vale1:b -f tx -R ..rate_in_pps -+// and launch bro like this -+/* -+ -+BROPATH=`./bro-path-dev` ./src/bro -i vale1:a -b -e 'global l=0; event p(){local s=net_stats(); local c=s$pkts_recvd;print c-l;l=c; schedule 1 sec {p()};} event bro_init(){event p();}' -+ -+ */ -+#define NETMAP_WITH_LIBS -+#include -+ -+#endif /* HAVE_NETMAP */ - - // ### This needs auto-confing. - #ifdef HAVE_PCAP_INT_H -@@ -75,7 +95,14 @@ int PktSrc::ExtractNextPacket() - return 0; - } - -+#ifdef HAVE_NETMAP -+ // in netmap mode call netmap equivalent of pcap_next() -+ if (IS_NETMAP_DESC(pd)) -+ data = last_data = nm_nextpkt((struct nm_desc *)pd, -+ (struct nm_pkthdr *)&hdr); -+ else -+#endif /* HAVE_NETMAP */ - data = last_data = pcap_next(pd, &hdr); - - if ( data && (hdr.len == 0 || hdr.caplen == 0) ) - { -@@ -407,6 +435,11 @@ void PktSrc::Close() - { - if ( pd ) - { -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pd)) -+ nm_close((struct nm_desc *)pd); -+ else -+#endif /* HAVE_NETMAP */ - pcap_close(pd); - pd = 0; - closed = true; -@@ -443,6 +476,14 @@ void PktSrc::Statistics(Stats* s) - else - { - struct pcap_stat pstat; -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pd)) -+ { -+ s->dropped = stats.dropped; -+ s->link = stats.received; -+ } -+ else -+#endif /* HAVE_NETMAP */ - if ( pcap_stats(pd, &pstat) < 0 ) - { - reporter->Error("problem getting packet filter statistics: %s", -@@ -482,6 +523,21 @@ PktInterfaceSrc::PktInterfaceSrc(const char* arg_interface, const char* filter, - - interface = copy_string(arg_interface); - -+#ifdef HAVE_NETMAP -+ pd = (pcap_t *)nm_open(interface, getenv("NETMAP_RING_ID"), 0, 0); -+ // netmap interfaces are named netmap:* or vale* -+ // If pd == 0 && errno == 0 "interface" is not a valid -+ // netmap interface name, so we fall through to pcap -+ if (pd || errno > 0) -+ { -+ if (pd) -+ selectable_fd = NETMAP_FD(pd); -+ else -+ closed = true; -+ return; -+ } -+#endif /* HAVE_NETMAP */ -+ - // Determine network and netmask. - uint32 net; - if ( pcap_lookupnet(interface, &net, &netmask, tmp_errbuf) < 0 ) diff --git a/private/extra/bsd-lem-intr_latency.diff b/private/extra/bsd-lem-intr_latency.diff deleted file mode 100644 index 8440829d8..000000000 --- a/private/extra/bsd-lem-intr_latency.diff +++ /dev/null @@ -1,33 +0,0 @@ -Index: /home/luigi/FreeBSD/head/sys/dev/e1000/if_lem.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/dev/e1000/if_lem.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/dev/e1000/if_lem.c (working copy) -@@ -318,6 +318,10 @@ - - #ifdef DEV_NETMAP /* see ixgbe.c for details */ - #include -+uint64_t tsc_irq_start, tsc_irq_end, tsc_irq_delta; -+SYSCTL_DECL(_dev_netmap); -+SYSCTL_UQUAD(_dev_netmap, OID_AUTO, delta, -+ CTLFLAG_RD, &tsc_irq_delta, 0, ""); - #endif /* DEV_NETMAP */ - - /********************************************************************* -@@ -1335,7 +1339,8 @@ - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; - -- -+ tsc_irq_end = rdtsc(); -+ tsc_irq_delta = tsc_irq_end - tsc_irq_start; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - lem_rxeof(adapter, adapter->rx_process_limit, NULL); - EM_TX_LOCK(adapter); -@@ -1362,6 +1367,7 @@ - u32 reg_icr; - - ifp = adapter->ifp; -+ tsc_irq_start = rdtsc(); - - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - diff --git a/private/extra/e1000-paravirt.diff b/private/extra/e1000-paravirt.diff deleted file mode 100644 index 7bdc1aab4..000000000 --- a/private/extra/e1000-paravirt.diff +++ /dev/null @@ -1,396 +0,0 @@ -Index: sys/dev/e1000/if_lem.c -=================================================================== ---- sys/dev/e1000/if_lem.c (revision 257831) -+++ sys/dev/e1000/if_lem.c (working copy) -@@ -32,6 +32,10 @@ - ******************************************************************************/ - /*$FreeBSD$*/ - -+#define BATCH_DISPATCH -+#define NIC_SEND_COMBINING -+#define NIC_PARAVIRT /* enable virtio-like synchronization */ -+ - #include "opt_inet.h" - #include "opt_inet6.h" - -@@ -290,8 +294,8 @@ - static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); - static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); - static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); --static int lem_rxd = EM_DEFAULT_RXD; --static int lem_txd = EM_DEFAULT_TXD; -+static int lem_rxd = 8* EM_DEFAULT_RXD; -+static int lem_txd = 8* EM_DEFAULT_TXD; - static int lem_smart_pwr_down = FALSE; - - /* Controls whether promiscuous also shows bad packets */ -@@ -459,6 +463,20 @@ - "max number of rx packets to process", &adapter->rx_process_limit, - lem_rx_process_limit); - -+#ifdef NIC_SEND_COMBINING -+ /* Sysctls to control mitigation */ -+ lem_add_rx_process_limit(adapter, "sc_enable", -+ "driver TDT mitigation", &adapter->sc_enable, 0); -+#endif /* NIC_SEND_COMBINING */ -+#ifdef BATCH_DISPATCH -+ lem_add_rx_process_limit(adapter, "batch_enable", -+ "driver rx batch", &adapter->batch_enable, 0); -+#endif /* BATCH_DISPATCH */ -+#ifdef NIC_PARAVIRT -+ lem_add_rx_process_limit(adapter, "rx_retries", -+ "driver rx retries", &adapter->rx_retries, 0); -+#endif /* NIC_PARAVIRT */ -+ - /* Sysctl for setting the interface flow control */ - lem_set_flow_cntrl(adapter, "flow_control", - "flow control setting", -@@ -516,6 +534,49 @@ - */ - adapter->hw.mac.report_tx_early = 1; - -+#ifdef NIC_PARAVIRT -+ device_printf(dev, "driver supports paravirt, subdev 0x%x\n", -+ adapter->hw.subsystem_device_id); -+ if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) { -+ uint64_t bus_addr; -+ -+ device_printf(dev, "paravirt support on dev %p\n", adapter); -+ tsize = 4096; // XXX one page for the csb -+ if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) { -+ device_printf(dev, "Unable to allocate csb memory\n"); -+ error = ENOMEM; -+ goto err_csb; -+ } -+ /* Setup the Base of the CSB */ -+ adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr; -+ /* force the first kick */ -+ adapter->csb->host_need_txkick = 1; /* txring empty */ -+ adapter->csb->guest_need_rxkick = 1; /* no rx packets */ -+ bus_addr = adapter->csb_mem.dma_paddr; -+ lem_add_rx_process_limit(adapter, "csb_on", -+ "enable paravirt.", &adapter->csb->guest_csb_on, 0); -+ lem_add_rx_process_limit(adapter, "txc_lim", -+ "txc_lim", &adapter->csb->host_txcycles_lim, 1); -+ -+ /* some stats */ -+#define PA_SC(name, var, val) \ -+ lem_add_rx_process_limit(adapter, name, name, var, val) -+ PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1); -+ PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0); -+ PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0); -+ PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1); -+ PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0); -+ PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0); -+ PA_SC("tdt_int_count",&adapter->tdt_int_count, 0); -+ PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0); -+ /* tell the host where the block is */ -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH, -+ (u32)(bus_addr >> 32)); -+ E1000_WRITE_REG(&adapter->hw, E1000_CSBAL, -+ (u32)bus_addr); -+ } -+#endif /* NIC_PARAVIRT */ -+ - tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), - EM_DBA_ALIGN); - -@@ -674,6 +735,11 @@ - err_rx_desc: - lem_dma_free(adapter, &adapter->txdma); - err_tx_desc: -+#ifdef NIC_PARAVIRT -+ lem_dma_free(adapter, &adapter->csb_mem); -+err_csb: -+#endif /* NIC_PARAVIRT */ -+ - err_pci: - if (adapter->ifp != NULL) - if_free(adapter->ifp); -@@ -761,6 +827,12 @@ - adapter->rx_desc_base = NULL; - } - -+#ifdef NIC_PARAVIRT -+ if (adapter->csb) { -+ lem_dma_free(adapter, &adapter->csb_mem); -+ adapter->csb = NULL; -+ } -+#endif /* NIC_PARAVIRT */ - lem_release_hw_control(adapter); - free(adapter->mta, M_DEVBUF); - EM_TX_LOCK_DESTROY(adapter); -@@ -870,6 +942,15 @@ - } - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; -+#ifdef NIC_PARAVIRT -+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE && adapter->csb && -+ adapter->csb->guest_csb_on && !adapter->csb->guest_need_txkick) { -+ adapter->csb->guest_need_txkick = 1; -+ adapter->guest_need_kick_count++; -+ // XXX memory barrier -+ lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE -+ } -+#endif /* NIC_PARAVIRT */ - - return; - } -@@ -1310,6 +1391,9 @@ - lem_rxeof(adapter, -1, NULL); - - EM_TX_LOCK(adapter); -+#ifdef NIC_PARAVIRT -+ adapter->tdt_int_count++; -+#endif /* NIC_PARAVIRT */ - lem_txeof(adapter); - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) -@@ -1349,6 +1433,9 @@ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); - EM_TX_LOCK(adapter); -+#ifdef NIC_PARAVIRT -+ adapter->tdt_int_count++; -+#endif /* NIC_PARAVIRT */ - lem_txeof(adapter); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - lem_start_locked(ifp); -@@ -1716,6 +1803,41 @@ - */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); -+ -+#ifdef NIC_PARAVIRT -+ if (adapter->csb) { -+ adapter->csb->guest_tdt = i; -+ /* XXX memory barrier ? */ -+ if (adapter->csb->guest_csb_on && -+ !adapter->csb->host_need_txkick) { -+ /* XXX maybe useless -+ * clean the ring. maybe do it before ? -+ * maybe a little bit of histeresys ? -+ */ -+ if (adapter->num_tx_desc_avail <= 64) {// XXX -+ lem_txeof(adapter); -+ } -+ adapter->tdt_csb_count++; -+ return (0); -+ } -+ } -+#endif /* NIC_PARAVIRT */ -+ -+#ifdef NIC_SEND_COMBINING -+ if (adapter->sc_enable) { -+ if (adapter->shadow_tdt & MIT_PENDING_INT) { -+ /* signal intr and data pending */ -+ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); -+ return (0); -+ } else { -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } -+ } -+#endif /* NIC_SEND_COMBINING */ -+#ifdef NIC_PARAVIRT -+ adapter->tdt_reg_count++; -+#endif /* NIC_PARAVIRT */ -+ - if (adapter->hw.mac.type == e1000_82547 && - adapter->link_duplex == HALF_DUPLEX) - lem_82547_move_tail(adapter); -@@ -1996,6 +2118,20 @@ - - lem_smartspeed(adapter); - -+#ifdef NIC_PARAVIRT -+ /* recover space if needed */ -+ if (adapter->csb && adapter->csb->guest_csb_on && -+ (adapter->watchdog_check == TRUE) && -+ (ticks - adapter->watchdog_time > EM_WATCHDOG) && -+ (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) { -+ lem_txeof(adapter); -+ /* -+ * lem_txeof() normally (except when space in the queue -+ * runs low XXX) cleans watchdog_check so that -+ * we do not hung. -+ */ -+ } -+#endif /* NIC_PARAVIRT */ - /* - * We check the watchdog: the time since - * the last TX descriptor was cleaned. -@@ -3056,6 +3192,16 @@ - adapter->next_tx_to_clean = first; - adapter->num_tx_desc_avail = num_avail; - -+#ifdef NIC_SEND_COMBINING -+ if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { -+ /* a tdt write is pending, do it */ -+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), -+ 0xffff & adapter->shadow_tdt); -+ adapter->shadow_tdt = MIT_PENDING_INT; -+ } else { -+ adapter->shadow_tdt = 0; // disable -+ } -+#endif /* NIC_SEND_COMBINING */ - /* - * If we have enough room, clear IFF_DRV_OACTIVE to - * tell the stack that it is OK to send packets. -@@ -3063,6 +3209,12 @@ - */ - if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; -+#ifdef NIC_PARAVIRT -+ if (adapter->csb) { // XXX also csb_on ? -+ adapter->csb->guest_need_txkick = 0; -+ // XXX memory barrier -+ } -+#endif /* NIC_PARAVIRT */ - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { - adapter->watchdog_check = FALSE; - return; -@@ -3369,6 +3521,10 @@ - if (ifp->if_capenable & IFCAP_NETMAP) - rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail; - #endif /* DEV_NETMAP */ -+#ifdef NIC_PARAVIRT -+ if (adapter->csb) -+ adapter->csb->guest_rdt = rctl; -+#endif /* NIC_PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); - - return; -@@ -3446,7 +3602,21 @@ - int i, rx_sent = 0; - struct e1000_rx_desc *current_desc; - -+#ifdef BATCH_DISPATCH -+ struct mbuf *mh = NULL, *mt = NULL; -+#endif /* BATCH_DISPATCH */ -+#ifdef NIC_PARAVIRT -+ int retries = 0; -+ struct paravirt_csb* csb = adapter->csb; -+ int csb_mode = csb && csb->guest_csb_on; -+ -+ ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ if (csb_mode && csb->guest_need_rxkick) -+ csb->guest_need_rxkick = 0; -+#endif /* NIC_PARAVIRT */ - EM_RX_LOCK(adapter); -+ -+batch_again: - i = adapter->next_rx_desc_to_check; - current_desc = &adapter->rx_desc_base[i]; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, -@@ -3459,19 +3629,45 @@ - } - #endif /* DEV_NETMAP */ - -+#if 0 // XXX optimization ? - if (!((current_desc->status) & E1000_RXD_STAT_DD)) { - if (done != NULL) - *done = rx_sent; - EM_RX_UNLOCK(adapter); - return (FALSE); - } -+#endif /* 0 */ - - while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) { - struct mbuf *m = NULL; - - status = current_desc->status; -- if ((status & E1000_RXD_STAT_DD) == 0) -+ if ((status & E1000_RXD_STAT_DD) == 0) { -+#ifdef NIC_PARAVIRT -+ if (csb_mode) { -+ /* buffer not ready yet. Retry a few times before giving up */ -+ if (++retries <= adapter->rx_retries) { -+ continue; -+ } -+ if (csb->guest_need_rxkick == 0) { -+ ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ csb->guest_need_rxkick = 1; -+ // XXX memory barrier, status volatile ? -+ continue; /* double check */ -+ } -+ } -+ /* no buffer ready, give up */ -+#endif /* NIC_PARAVIRT */ - break; -+ } -+#ifdef NIC_PARAVIRT -+ if (csb_mode) { -+ if (csb->guest_need_rxkick) -+ ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check); -+ csb->guest_need_rxkick = 0; -+ retries = 0; -+ } -+#endif /* NIC_PARAVIRT */ - - mp = adapter->rx_buffer_area[i].m_head; - /* -@@ -3596,11 +3792,36 @@ - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - -+#ifdef NIC_PARAVIRT -+ if (csb_mode) { -+ /* the buffer at i has been already replaced by lem_get_buf() -+ * so it is safe to set guest_rdt = i and possibly send a kick. -+ * XXX see if we can optimize it later. -+ */ -+ csb->guest_rdt = i; -+ // XXX memory barrier -+ if (i == csb->host_rxkick_at) -+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); -+ } -+#endif /* NIC_PARAVIRT */ - /* Advance our pointers to the next descriptor. */ - if (++i == adapter->num_rx_desc) - i = 0; - /* Call into the stack */ - if (m != NULL) { -+#ifdef BATCH_DISPATCH -+ if (adapter->batch_enable) { -+ if (mh == NULL) -+ mh = mt = m; -+ else -+ mt->m_nextpkt = m; -+ mt = m; -+ m->m_nextpkt = NULL; -+ rx_sent++; -+ current_desc = &adapter->rx_desc_base[i]; -+ continue; -+ } -+#endif /* BATCH_DISPATCH */ - adapter->next_rx_desc_to_check = i; - EM_RX_UNLOCK(adapter); - (*ifp->if_input)(ifp, m); -@@ -3611,10 +3832,27 @@ - current_desc = &adapter->rx_desc_base[i]; - } - adapter->next_rx_desc_to_check = i; -+#ifdef BATCH_DISPATCH -+ if (mh) { -+ EM_RX_UNLOCK(adapter); -+ while ( (mt = mh) != NULL) { -+ mh = mh->m_nextpkt; -+ mt->m_nextpkt = NULL; -+ (*ifp->if_input)(ifp, mt); -+ } -+ EM_RX_LOCK(adapter); -+ i = adapter->next_rx_desc_to_check; /* in case of interrupts */ -+ if (count > 0) -+ goto batch_again; -+ } -+#endif /* BATCH_DISPATCH */ - - /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ - if (--i < 0) - i = adapter->num_rx_desc - 1; -+#ifdef NIC_PARAVIRT -+ if (!csb_mode) /* filter out writes */ -+#endif /* NIC_PARAVIRT */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); - if (done != NULL) - *done = rx_sent; diff --git a/private/extra/libpcap-netmap.diff b/private/extra/libpcap-netmap.diff deleted file mode 100644 index cd6c38973..000000000 --- a/private/extra/libpcap-netmap.diff +++ /dev/null @@ -1,389 +0,0 @@ -diff --git a/Makefile.in b/Makefile.in -index 9995458..c670d66 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -83,7 +83,7 @@ YACC = @V_YACC@ - @rm -f $@ - $(CC) $(FULL_CFLAGS) -c $(srcdir)/$*.c - --PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@ -+PSRC = pcap-@V_PCAP@.c @USB_SRC@ @BT_SRC@ @CAN_SRC@ @NETFILTER_SRC@ @CANUSB_SRC@ @DBUS_SRC@ @NETMAP_SRC@ - FSRC = fad-@V_FINDALLDEVS@.c - SSRC = @SSRC@ - CSRC = pcap.c inet.c gencode.c optimize.c nametoaddr.c etherent.c \ -@@ -313,6 +313,7 @@ EXTRA_DIST = \ - pcap-namedb.h \ - pcap-netfilter-linux.c \ - pcap-netfilter-linux.h \ -+ pcap-netmap.c \ - pcap-nit.c \ - pcap-null.c \ - pcap-pf.c \ -diff --git a/config.h.in b/config.h.in -index c6bc68e..09c8557 100644 ---- a/config.h.in -+++ b/config.h.in -@@ -268,6 +268,9 @@ - /* target host supports netfilter sniffing */ - #undef PCAP_SUPPORT_NETFILTER - -+/* target host supports netmap */ -+#undef PCAP_SUPPORT_NETMAP -+ - /* target host supports USB sniffing */ - #undef PCAP_SUPPORT_USB - -diff --git a/configure b/configure -index be87668..a8d0cae 100755 ---- a/configure -+++ b/configure -@@ -626,6 +626,8 @@ INSTALL_PROGRAM - DBUS_SRC - PCAP_SUPPORT_DBUS - PKGCONFIG -+NETMAP_SRC -+PCAP_SUPPORT_NETMAP - CAN_SRC - PCAP_SUPPORT_CAN - CANUSB_SRC -@@ -747,6 +749,7 @@ enable_shared - enable_bluetooth - enable_canusb - enable_can -+enable_netmap - enable_dbus - ' - ac_precious_vars='build_alias -@@ -1385,6 +1388,8 @@ Optional Features: - available] - --enable-can enable CAN support [default=yes, if support - available] -+ --enable-netmap enable netmap support [default=yes, if support -+ available] - --enable-dbus enable D-Bus capture support [default=yes, if - support available] - -@@ -8148,6 +8153,39 @@ $as_echo "$as_me: no CAN sniffing support implemented for $host_os" >&6;} - - fi - -+# Check whether --enable-netmap was given. -+if test "${enable_netmap+set}" = set; then : -+ enableval=$enable_netmap; -+else -+ enable_netmap=yes -+fi -+ -+ -+if test "x$enable_netmap" != "xno" ; then -+ case "$host_os" in -+ *) -+ ac_fn_c_check_header_compile "$LINENO" "net/netmap_user.h" "ac_cv_header_net_netmap_user_h" "#include -+ -+" -+if test "x$ac_cv_header_net_netmap_user_h" = xyes; then : -+ -+$as_echo "#define PCAP_SUPPORT_NETMAP 1" >>confdefs.h -+ -+ NETMAP_SRC=pcap-netmap.c -+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is supported" >&5 -+$as_echo "$as_me: netmap is supported" >&6;} -+else -+ { $as_echo "$as_me:${as_lineno-$LINENO}: netmap is not supported" >&5 -+$as_echo "$as_me: netmap is not supported" >&6;} -+fi -+ -+ -+ ;; -+ esac -+ -+ -+fi -+ - # Check whether --enable-dbus was given. - if test "${enable_dbus+set}" = set; then : - enableval=$enable_dbus; -diff --git a/configure.in b/configure.in -index f0aa2c5..55464ba 100644 ---- a/configure.in -+++ b/configure.in -@@ -1550,6 +1550,28 @@ if test "x$enable_can" != "xno" ; then - AC_SUBST(CAN_SRC) - fi - -+AC_ARG_ENABLE([netmap], -+[AC_HELP_STRING([--enable-netmap],[enable netmap support @<:@default=yes, if support available@:>@])], -+ [], -+ [enable_netmap=yes]) -+ -+if test "x$enable_netmap" != "xno" ; then -+ dnl check for netmap support -+ case "$host_os" in -+ *) -+ AC_CHECK_HEADER(net/netmap_user.h, -+ [ AC_DEFINE(PCAP_SUPPORT_NETMAP, 1, [target host supports netmap]) -+ NETMAP_SRC=pcap-netmap.c -+ AC_MSG_NOTICE(netmap is supported)], -+ AC_MSG_NOTICE(netmap is not supported), -+ [#include ] -+ ) -+ ;; -+ esac -+ AC_SUBST(PCAP_SUPPORT_NETMAP) -+ AC_SUBST(NETMAP_SRC) -+fi -+ - AC_ARG_ENABLE([dbus], - [AC_HELP_STRING([--enable-dbus],[enable D-Bus capture support @<:@default=yes, if support available@:>@])], - [], -diff --git a/inet.c b/inet.c -index c699658..d132507 100644 ---- a/inet.c -+++ b/inet.c -@@ -883,6 +883,10 @@ pcap_lookupnet(device, netp, maskp, errbuf) - #ifdef PCAP_SUPPORT_USB - || strstr(device, "usbmon") != NULL - #endif -+#ifdef PCAP_SUPPORT_NETMAP -+ || !strncmp(device, "netmap:", 7) -+ || !strncmp(device, "vale", 4) -+#endif - #ifdef HAVE_SNF_API - || strstr(device, "snf") != NULL - #endif -diff --git a/pcap-netmap.c b/pcap-netmap.c -new file mode 100644 -index 0000000..2568c2f ---- /dev/null -+++ b/pcap-netmap.c -@@ -0,0 +1,205 @@ -+/* -+ * Copyright 2014 Universita` di Pisa -+ * -+ * packet filter subroutines for netmap -+ */ -+ -+#ifdef HAVE_CONFIG_H -+#include "config.h" -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define NETMAP_WITH_LIBS -+#include -+ -+#include "pcap-int.h" -+ -+#if defined (linux) -+/* On FreeBSD we use IFF_PPROMISC which is in ifr_flagshigh. -+ * remap to IFF_PROMISC on linux -+ */ -+#define IFF_PPROMISC IFF_PROMISC -+#define ifr_flagshigh ifr_flags -+#endif /* linux */ -+ -+struct pcap_netmap { -+ struct nm_desc *d; /* pointer returned by nm_open() */ -+ pcap_handler cb; /* callback and argument */ -+ u_char *cb_arg; -+ int must_clear_promisc; /* flag */ -+ uint64_t rx_pkts; /* count of packets received before the filter */ -+}; -+ -+static int -+pcap_netmap_stats(pcap_t *p, struct pcap_stat *ps) -+{ -+ struct pcap_netmap *pn = p->priv; -+ -+ ps->ps_recv = pn->rx_pkts; -+ ps->ps_drop = 0; -+ ps->ps_ifdrop = 0; -+ return 0; -+} -+ -+static void -+pcap_netmap_filter(u_char *arg, struct pcap_pkthdr *h, const u_char *buf) -+{ -+ pcap_t *p = (pcap_t *)arg; -+ struct pcap_netmap *pn = p->priv; -+ -+ ++pn->rx_pkts; -+ if (bpf_filter(p->fcode.bf_insns, buf, h->len, h->caplen)) -+ pn->cb(pn->cb_arg, h, buf); -+} -+ -+static int -+pcap_netmap_dispatch(pcap_t *p, int cnt, pcap_handler cb, u_char *user) -+{ -+ int ret; -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ struct pollfd pfd = { .fd = p->fd, .events = POLLIN, .revents = 0 }; -+ -+ pn->cb = cb; -+ pn->cb_arg = user; -+ -+ for (;;) { -+ if (p->break_loop) { -+ p->break_loop = 0; -+ return PCAP_ERROR_BREAK; -+ } -+ /* nm_dispatch won't run forever */ -+ ret = nm_dispatch((void *)d, cnt, (void *)pcap_netmap_filter, (void *)p); -+ if (ret != 0) -+ break; -+ poll(&pfd, 1, p->opt.timeout); -+ } -+ return ret; -+} -+ -+/* XXX need to check the NIOCTXSYNC/poll */ -+static int -+pcap_netmap_inject(pcap_t *p, const void *buf, size_t size) -+{ -+ struct nm_desc *d = ((struct pcap_netmap *)p->priv)->d; -+ -+ return nm_inject(d, buf, size); -+} -+ -+static int -+pcap_netmap_ioctl(pcap_t *p, u_long what, uint32_t *if_flags) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ struct ifreq ifr; -+ int error, fd = d->fd; -+ -+#ifdef linux -+ fd = socket(AF_INET, SOCK_DGRAM, 0); -+ if (fd < 0) { -+ fprintf(stderr, "Error: cannot get device control socket.\n"); -+ return -1; -+ } -+#endif /* linux */ -+ bzero(&ifr, sizeof(ifr)); -+ strncpy(ifr.ifr_name, d->req.nr_name, sizeof(ifr.ifr_name)); -+ switch (what) { -+ case SIOCSIFFLAGS: -+ ifr.ifr_flags = *if_flags; -+ ifr.ifr_flagshigh = *if_flags >> 16; -+ break; -+ } -+ error = ioctl(fd, what, &ifr); -+ fprintf(stderr, "%s %s ioctl 0x%lx returns %d\n", __FUNCTION__, -+ d->req.nr_name, what, error); -+ if (error) -+ return -1; -+ switch (what) { -+ case SIOCGIFFLAGS: -+ *if_flags = ifr.ifr_flags | (ifr.ifr_flagshigh << 16); -+ } -+ return 0; -+} -+ -+static void -+pcap_netmap_close(pcap_t *p) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = pn->d; -+ uint32_t if_flags = 0; -+ -+ if (pn->must_clear_promisc) { -+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ -+ if (if_flags & IFF_PPROMISC) { -+ if_flags &= ~IFF_PPROMISC; -+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); -+ } -+ } -+ nm_close(d); -+} -+ -+static int -+pcap_netmap_activate(pcap_t *p) -+{ -+ struct pcap_netmap *pn = p->priv; -+ struct nm_desc *d = nm_open(p->opt.source, NULL, 0, NULL); -+ uint32_t if_flags = 0; -+ -+ if (d == NULL) { -+ snprintf(p->errbuf, PCAP_ERRBUF_SIZE, -+ "netmap open: cannot access %s: %s\n", -+ p->opt.source, pcap_strerror(errno)); -+ goto bad; -+ } -+ fprintf(stderr, "%s device %s priv %p fd %d ports %d..%d\n", -+ __FUNCTION__, p->opt.source, d, d->fd, d->first_rx_ring, d->last_rx_ring); -+ pn->d = d; -+ p->fd = d->fd; -+ if (p->opt.promisc && !(d->req.nr_ringid & NETMAP_SW_RING)) { -+ pcap_netmap_ioctl(p, SIOCGIFFLAGS, &if_flags); /* fetch flags */ -+ if (!(if_flags & IFF_PPROMISC)) { -+ pn->must_clear_promisc = 1; -+ if_flags |= IFF_PPROMISC; -+ pcap_netmap_ioctl(p, SIOCSIFFLAGS, &if_flags); -+ } -+ } -+ p->linktype = DLT_EN10MB; -+ p->selectable_fd = p->fd; -+ p->read_op = pcap_netmap_dispatch; -+ p->inject_op = pcap_netmap_inject, -+ p->setfilter_op = install_bpf_program; -+ p->setdirection_op = NULL; -+ p->set_datalink_op = NULL; -+ p->getnonblock_op = pcap_getnonblock_fd; -+ p->setnonblock_op = pcap_setnonblock_fd; -+ p->stats_op = pcap_netmap_stats; -+ p->cleanup_op = pcap_netmap_close; -+ return (0); -+ -+ bad: -+ pcap_cleanup_live_common(p); -+ return (PCAP_ERROR); -+} -+ -+pcap_t * -+pcap_netmap_create(const char *device, char *ebuf, int *is_ours) -+{ -+ pcap_t *p; -+ -+ *is_ours = (!strncmp(device, "netmap:", 7) || !strncmp(device, "vale", 4)); -+ if (! *is_ours) -+ return NULL; -+ p = pcap_create_common(device, ebuf, sizeof (struct pcap_netmap)); -+ if (p == NULL) -+ return (NULL); -+ p->activate_op = pcap_netmap_activate; -+ return (p); -+} -diff --git a/pcap.c b/pcap.c -index b2b5da6..beda714 100644 ---- a/pcap.c -+++ b/pcap.c -@@ -104,6 +104,10 @@ - #include "pcap-dbus.h" - #endif - -+#ifdef PCAP_SUPPORT_NETMAP -+pcap_t* pcap_netmap_create(const char *device, char *ebuf, int *is_ours); -+#endif -+ - int - pcap_not_initialized(pcap_t *pcap _U_) - { -@@ -307,6 +311,9 @@ struct capture_source_type { - int (*findalldevs_op)(pcap_if_t **, char *); - pcap_t *(*create_op)(const char *, char *, int *); - } capture_source_types[] = { -+#ifdef PCAP_SUPPORT_NETMAP -+ { NULL, pcap_netmap_create }, -+#endif - #ifdef HAVE_DAG_API - { dag_findalldevs, dag_create }, - #endif diff --git a/private/extra/netreceive.c b/private/extra/netreceive.c deleted file mode 100644 index 80be69374..000000000 --- a/private/extra/netreceive.c +++ /dev/null @@ -1,264 +0,0 @@ -/*- - * Copyright (c) 2004 Robert N. M. Watson - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include -#include -#include -#include - -#include -#include /* getaddrinfo */ - -#include - -#include -#include -#include -#include /* close */ - -#define MAXSOCK 20 - -#include -#include -#include /* clock_getres() */ - -static int round_to(int n, int l) -{ - return ((n + l - 1)/l)*l; -} - -/* - * Each socket uses multiple threads so the receiver is - * more efficient. A collector thread runs the stats. - */ -struct td_desc { - pthread_t td_id; - uint64_t count; /* rx counter */ - int fd; - char *buf; - int buflen; -}; - -static void -usage(void) -{ - - fprintf(stderr, "netreceive port [nthreads]\n"); - exit(-1); -} - -static __inline void -timespec_add(struct timespec *tsa, struct timespec *tsb) -{ - - tsa->tv_sec += tsb->tv_sec; - tsa->tv_nsec += tsb->tv_nsec; - if (tsa->tv_nsec >= 1000000000) { - tsa->tv_sec++; - tsa->tv_nsec -= 1000000000; - } -} - -static __inline void -timespec_sub(struct timespec *tsa, struct timespec *tsb) -{ - - tsa->tv_sec -= tsb->tv_sec; - tsa->tv_nsec -= tsb->tv_nsec; - if (tsa->tv_nsec < 0) { - tsa->tv_sec--; - tsa->tv_nsec += 1000000000; - } -} - -static void * -rx_body(void *data) -{ - struct td_desc *t = data; - struct pollfd fds; - int y; - - fds.fd = t->fd; - fds.events = POLLIN; - - for (;;) { - if (poll(&fds, 1, -1) < 0) - perror("poll on thread"); - if (!(fds.revents & POLLIN)) - continue; - for (;;) { - y = recv(t->fd, t->buf, t->buflen, MSG_DONTWAIT); - if (y < 0) - break; - t->count++; - } - } - return NULL; -} - -int -make_threads(struct td_desc **tp, int *s, int nsock, int nthreads) -{ - int i, si, nt = nsock * nthreads; - int lb = round_to(nt * sizeof (struct td_desc *), 64); - int td_len = round_to(sizeof(struct td_desc), 64); // cache align - char *m = calloc(1, lb + td_len * nt); - - printf("td len %d -> %d\n", (int)sizeof(struct td_desc) , td_len); - /* pointers plus the structs */ - if (m == NULL) { - perror("no room for pointers!"); - exit(1); - } - tp = (struct td_desc **)m; - m += lb; /* skip the pointers */ - for (si = i = 0; i < nt; i++, m += td_len) { - tp[i] = (struct td_desc *)m; - tp[i]->fd = s[si]; - if (++si == nsock) - si = 0; - if (pthread_create(&tp[i]->td_id, NULL, rx_body, tp[i])) { - perror("unable to create thread"); - exit(1); - } - } -} - -int -main_thread(struct td_desc **tp, int nsock, int nthreads) -{ - uint64_t c0, c1; - struct timespec now, then, delta; - /* now the parent collects and prints results */ - c0 = c1 = 0; - clock_gettime(CLOCK_REALTIME, &then); - fprintf(stderr, "start at %ld.%09ld\n", then.tv_sec, then.tv_nsec); - while (1) { - int i, nt = nsock * nthreads; - int64_t dn; - uint64_t pps; - - if (poll(NULL, 0, 500) < 0) - perror("poll"); - c0 = 0; - for (i = 0; i < nt; i++) { - c0 += tp[i]->count; - } - dn = c0 - c1; - clock_gettime(CLOCK_REALTIME, &now); - delta = now; - timespec_sub(&delta, &then); - then = now; - pps = dn; - pps = (pps * 1000000000) / (delta.tv_sec*1000000000 + delta.tv_nsec + 1); - fprintf(stderr, "%d pkts in %ld.%09ld ns %ld pps\n", - (int)dn, delta.tv_sec, delta.tv_nsec, (long)pps); - c1 = c0; - } -} - -int -main(int argc, char *argv[]) -{ - struct addrinfo hints, *res, *res0; - char *dummy, *packet; - int port; - int error, v, nthreads = 1; - struct td_desc **tp; - const char *cause = NULL; - int s[MAXSOCK]; - int nsock; - - if (argc < 2) - usage(); - - memset(&hints, 0, sizeof(hints)); - hints.ai_family = PF_UNSPEC; - hints.ai_socktype = SOCK_DGRAM; - hints.ai_flags = AI_PASSIVE; - - port = strtoul(argv[1], &dummy, 10); - if (port < 1 || port > 65535 || *dummy != '\0') - usage(); - if (argc > 2) - nthreads = strtoul(argv[2], &dummy, 10); - if (nthreads < 1 || nthreads > 64) - usage(); - - packet = malloc(65536); - if (packet == NULL) { - perror("malloc"); - return (-1); - } - bzero(packet, 65536); - - error = getaddrinfo(NULL, argv[1], &hints, &res0); - if (error) { - perror(gai_strerror(error)); - return (-1); - /*NOTREACHED*/ - } - - nsock = 0; - for (res = res0; res && nsock < MAXSOCK; res = res->ai_next) { - s[nsock] = socket(res->ai_family, res->ai_socktype, - res->ai_protocol); - if (s[nsock] < 0) { - cause = "socket"; - continue; - } - - v = 128 * 1024; - if (setsockopt(s[nsock], SOL_SOCKET, SO_RCVBUF, &v, sizeof(v)) < 0) { - cause = "SO_RCVBUF"; - close(s[nsock]); - continue; - } - if (bind(s[nsock], res->ai_addr, res->ai_addrlen) < 0) { - cause = "bind"; - close(s[nsock]); - continue; - } - (void) listen(s[nsock], 5); - nsock++; - } - if (nsock == 0) { - perror(cause); - return (-1); - /*NOTREACHED*/ - } - - printf("netreceive %d sockets x %d threads listening on UDP port %d\n", - nsock, nthreads, (u_short)port); - - make_threads(tp, s, nsock, nthreads); - main_thread(tp, nsock, nthreads); - - /*NOTREACHED*/ - freeaddrinfo(res0); -} diff --git a/private/extra/paravirt.h b/private/extra/paravirt.h deleted file mode 100644 index e8c49cb0b..000000000 --- a/private/extra/paravirt.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (C) 2013 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef NET_PARAVIRT_H -#define NET_PARAVIRT_H - -/* - Support for virtio-like communication between host (H) and guest (G) NICs. - - The guest allocates the shared Communication Status Block (csb) and - write its physical address at CSBAL and CSBAH (data is little endian). - csb->csb_on enables the mode. If disabled, the device acts a regular one. - - Notifications for tx and rx are exchanged without vm exits - if possible. In particular (only mentioning csb mode below), - the following actions are performed. In the description below, - "double check" means verifying again the condition that caused - the previous action, and reverting the action if the condition has - changed. The condition typically depends on a variable set by the - other party, and the double check is done to avoid races. E.g. - - // start with A=0 - again: - // do something - if ( cond(C) ) { // C is written by the other side - A = 1; - // barrier - if ( !cond(C) ) { - A = 0; - goto again; - } - } - - TX: start from idle: - H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new - transmissions, G always updates guest_tdt. If host_need_txkick == 1, - G also writes to the TDT, which acts as a kick to H (so pending - writes are always dispatched to H as soon as possible.) - - TX: active state: - On the kick (TDT write) H sets host_need_txkick == 0 (if not - done already by G), and starts an I/O thread trying to consume - packets from TDH to guest_tdt, periodically refreshing host_tdh - and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1, - and then does the "double check" for race avoidance. - - TX: G runs out of buffers - XXX there are two mechanisms, one boolean (using guest_need_txkick) - and one with a threshold (using guest_txkick_at). They are mutually - exclusive. - BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does - the double check. If H finds guest_need_txkick== 1 on a write - to TDH, it also generates an interrupt. - THRESHOLD: G sets guest_txkick_at to the TDH value for which it - wants to receive an interrupt. When H detects that TDH moves - across guest_txkick_at, it generates an interrupt. - This second mechanism reduces the number of interrupts and - TDT writes on the transmit side when the host is too slow. - - RX: start from idle - G starts with guest_need_rxkick = 1 when the receive ring is empty. - As packets arrive, H updates host_rdh (and RDH) and also generates an - interrupt when guest_need_rxkick == 1 (so incoming packets are - always reported to G as soon as possible, apart from interrupt - moderation delays). It also tracks guest_rdt for new buffers. - - RX: active state - As the interrupt arrives, G sets guest_need_rxkick = 0 and starts - draining packets from the receive ring, while updating guest_rdt - When G runs out of packets it sets guest_need_rxkick=1 and does the - double check. - - RX: H runs out of buffers - XXX there are two mechanisms, one boolean (using host_need_rxkick) - and one with a threshold (using host_xxkick_at). They are mutually - exclusive. - BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the - double check. If G finds host_need_rxkick==1 on updating guest_rdt, - it also writes to RDT causing a kick to H. - THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants - to receive a kick. When G detects that guest_rdt moves across - host_rxkick_at, it writes to RDT thus generates a kick. - This second mechanism reduces the number of kicks and - RDT writes on the receive side when the guest is too slow and - would free only a few buffers at a time. - - */ -struct paravirt_csb { - /* XXX revise the layout to minimize cache bounces. - * Usage is described as follows: - * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never - */ - /* these are (mostly) written by the guest */ - uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */ - uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */ - uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */ - uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */ - uint32_t guest_rdt; /* GW+ HR+ rx buffers available */ - uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */ - uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */ - uint32_t pad[9]; - - /* these are (mostly) written by the host */ - uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */ - uint32_t host_need_txkick; /* GR+ HW- start the iothread */ - uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep. - * set by the guest */ - uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */ - uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */ - uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */ - uint32_t host_isr; /* GR* HW* shadow copy of ISR */ - uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */ - uint32_t vnet_ring_high; /* Vnet ring physical address high. */ - uint32_t vnet_ring_low; /* Vnet ring physical address low. */ -}; - -#define NET_PARAVIRT_CSB_SIZE 4096 -#define NET_PARAVIRT_NONE (~((uint32_t)0)) - -#ifdef QEMU_PCI_H - -/* - * API functions only available within QEMU - */ - -void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal, - uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as); - -#endif /* QEMU_PCI_H */ - -#endif /* NET_PARAVIRT_H */ diff --git a/private/extra/python/Makefile b/private/extra/python/Makefile deleted file mode 100644 index 5bd9092f0..000000000 --- a/private/extra/python/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -PYTHON=python2 - - -build: always - $(PYTHON) setup.py build - -install: build - sudo $(PYTHON) setup.py install - -clean: - rm -rf build - -always: diff --git a/private/extra/python/README b/private/extra/python/README deleted file mode 100644 index c7e871b08..000000000 --- a/private/extra/python/README +++ /dev/null @@ -1,100 +0,0 @@ -========================= PYTHON BINDINGS FOR NETMAP ========================== - -The extra/python directory contains a C extension module that makes -it possible to use netmap from the Python (2.7 versions) programming -language. - - -(1) **************************** How to compile it **************************** - - cd extra/python - make - - -(2) ************ How to (compile and) install it in your system *************** - - cd extra/python - make install - - -(3) How to use it with python - - >>> import netmap - >>> # your code here - >>> ... - - -(4) ************************ Python classes for netmap ************************ - - The netmap extension module exports three Python classes that represent - the netmap memory layout: - (4.1) netmap.NetmapInterface - represents a "netmap_if" struct - (4.2) netmap.NetmapRing - represents a "netmap_ring" struct - (4.3) netmap.NetmapSlot - represents a "netmap_slot" struct - - The struct fields are **directly** read/write accessible (e.g. you are - accessing the real netmap memory) through the class members. - - You can issue - - >>> help(netmap.NetmapRing) - - or - - >>> help(r) # "r" is a reference to a netmap.NetmapRing instance - - to see the documentation (members and methods) of the specified class. - - The other two classes available in the netmap extension module - (netmap.Netmap and netmap.NetmapDesc) are intended to create, manage - and contain the netmap memory layout representation. - Each instance of such classes is intended to manage a network - interface. - - (4.5) netmap.Netmap - Apart from containing the netmap memory - layout (once the NIOCREGIF is done), it is basically a - wrapper for a "nmreq" struct, and can therefore be used - to access the whole netmap API. - Its constructor doesn't take any arguments, and allows the - user to use the ioctl netmap interface (NIOCREGIF, - NIOCGINFO, ...). - - Example: - >>> import netmap - >>> n=netmap.Netmap() - >>> n.open() # open the netmap device - >>> n.if_name = 'eth0' - >>> n.register() # registers all the hw rings of "eth0" - >>> # access n.interface, n.transmit_rings, n.receive_rings - >>> n.close() - - See help(netmap.Netmap) for reference. - - (4.6) netmap.NetmapDesc - This class is even simpler than - netmap.Netmap(), in that you don't have to separately - create the object, open the device and register, but you - can do these three operation with the constructor only. - Apart from this, you can use the nm_open() extended - interface names to specify what kind of registration you - desire (in fact the C backend for this class is nm_open()). - - Example: - >>> import netmap - >>> d=netmap.NetmapDesc('netmap:enp1s0f1*') - >>> # access d.interface, d.transmit_rings, d.receive_rings - - - -(5) ****************************** More examples ****************************** - - You can find some examples in extra/python: - - (5.1) pktgen.py - A minimalistic packet generator using the Python netmap - bindings. - - (5.2) pktman.py - A configurable packet generator/receiver. Run - - $ python pktman.py -h - - to see the available options. - diff --git a/private/extra/python/netmap.c b/private/extra/python/netmap.c deleted file mode 100644 index 693bd6f3f..000000000 --- a/private/extra/python/netmap.c +++ /dev/null @@ -1,345 +0,0 @@ -#include - -#include /* IFNAMSIZ */ -#include -#include - -#include "netmap_classes.h" - - -/* ############## Data and functions useful to all the classes ############# */ -PyObject *NetmapError; - -PyObject * -string_get(PyObject *str) -{ - Py_INCREF(str); - - return str; -} - -int -string_set(PyObject **str, PyObject *value) -{ - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete the attribute"); - return -1; - } - - if (!PyString_Check(value)) { - PyErr_SetString(PyExc_TypeError, - "The attribute value must be a string"); - return -1; - } - - Py_DECREF(*str); - Py_INCREF(value); - *str = value; - - return 0; -} - -/* @flags: contains the bits we want to pretty-print - @str: where to print - @avail: length of @str - @values: array of all the possible flags - @strings: array of names associated to each flag - @items: length of @values and @strings -*/ -void -netmap_flags_pretty(unsigned int flags, char *str, int avail, - unsigned int *values, const char **strings, int items) -{ - int ret; - int i; - - for (i = 0; avail && i < items; i++) { - if (flags & values[i]) { - ret = snprintf(str, avail, "[%s],", strings[i]); - if (ret < 0) { - *str = '\0'; - return; - } - str += ret; - avail -= ret; - } - } - - *str = '\0'; -} - -static unsigned int nr_flags_values[] = { NR_MONITOR_TX, NR_MONITOR_RX }; -static const char *nr_flags_strings[] = { "MonitorTx", "MonitorRx" }; -static unsigned int nr_poll_values[] = { NETMAP_NO_TX_POLL, - NETMAP_DO_RX_POLL }; -static const char *nr_poll_strings[] = { "NoTxPoll", "DoRxPoll" }; - -/* Pretty print nr_ringid and nr_flags. */ -void -ringid_pretty_print(uint32_t nr_ringid, uint32_t nr_flags, - char *ringid, int rsz, char *flags, int fsz) -{ - unsigned int idx; - int nr, nf; - - idx = nr_ringid & NETMAP_RING_MASK; - - if ((nr_flags & NR_REG_MASK) == (uint32_t)NR_REG_DEFAULT) { - /* Legacy 'ringid' API. */ - unsigned int ringflags = nr_ringid & ~NETMAP_RING_MASK - & ~NETMAP_NO_TX_POLL & ~NETMAP_DO_RX_POLL; - - switch (ringflags) { - case 0: - nr = sprintf(ringid, "[0x%04X] all hardware rings ", - nr_ringid); - break; - case NETMAP_HW_RING: - nr = sprintf(ringid, "[0x%04X] hardware rings pair %u ", - nr_ringid, idx); - break; - case NETMAP_SW_RING: - nr = sprintf(ringid, "[0x%04X] host rings pair ", nr_ringid); - break; - default: - nr = sprintf(ringid, "[0x%04X] ***UNKNOWN*** ", nr_ringid); - } - - sprintf(flags, "[0x%08X] Legacy ringid", nr_flags); - } else { - /* New 'ringid' API. */ - nr = sprintf(ringid, "[0x%04X] %u ", nr_ringid, idx); - - switch (nr_flags & NR_REG_MASK) { - case NR_REG_ALL_NIC: - nf = sprintf(flags, "[0x%08X] all hardware rings ", nr_flags); - break; - case NR_REG_SW: - nf = sprintf(flags, "[0x%08X] host ring pair ", nr_flags); - break; - case NR_REG_NIC_SW: - nf = sprintf(flags, "[0x%08X] all hardware and host rings ", - nr_flags); - break; - case NR_REG_ONE_NIC: - nf = sprintf(flags, "[0x%08X] an hardware rings pair ", - nr_flags); - break; - case NR_REG_PIPE_MASTER: - nf = sprintf(flags, "[0x%08X] a master pipe rings pair ", - nr_flags); - break; - case NR_REG_PIPE_SLAVE: - nf = sprintf(flags, "[0x%08X] a slave pipe rings pair ", - nr_flags); - break; - default: - nf = sprintf(flags, "[0x%08X] ***UNKNOWN*** ", nr_flags); - } - if (nf > 0) { - netmap_flags_pretty(nr_flags, flags + nf, fsz - nf, - nr_flags_values, nr_flags_strings, - sizeof(nr_flags_values) / sizeof(nr_flags_values[0])); - } - } - - if (nr > 0) { - netmap_flags_pretty(nr_ringid, ringid + nr, rsz - nr, - nr_poll_values, nr_poll_strings, - sizeof(nr_poll_values) / sizeof(nr_poll_values[0])); - } -} - - -/*########################### Module functions ############################*/ -static PyObject * -netmap_hello(PyObject *self, PyObject *args) -{ - const char *msg; - - if (!PyArg_ParseTuple(args, "s", &msg)) { - return NULL; - } - - return Py_BuildValue("s", msg); -} - -static PyMethodDef netmap_functions[] = { - { "hello", (PyCFunction)netmap_hello, METH_VARARGS, NULL }, - { NULL, NULL, 0, NULL } -}; - -#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ -#define PyMODINIT_FUNC void -#endif - -/* An integer constant visible from Python. */ -struct NetmapConst { - const char *name; - long value; -}; - -static struct NetmapConst netmap_constants[] = { - { - .name = "AllHwRings", - .value = 0, - }, - { - .name = "HwRing", - .value = NETMAP_HW_RING, - }, - { - .name = "SwRing", - .value = NETMAP_SW_RING, - }, - { - .name = "NoTxPoll", - .value = NETMAP_NO_TX_POLL, - }, - { - .name = "DoRxPoll", - .value = NETMAP_DO_RX_POLL, - }, - /* Add 'nmreq.flags' constants to the module. */ - { - .name = "RegDefault", - .value = NR_REG_DEFAULT, - }, - { - .name = "RegAllNic", - .value = NR_REG_ALL_NIC, - }, - { - .name = "RegSw", - .value = NR_REG_SW, - }, - { - .name = "RegNicSw", - .value = NR_REG_NIC_SW, - }, - { - .name = "RegOneNic", - .value = NR_REG_ONE_NIC, - }, - { - .name = "RegPipeMaster", - .value = NR_REG_PIPE_MASTER, - }, - { - .name = "RegPipeSlave", - .value = NR_REG_PIPE_SLAVE, - }, - { - .name = "RegMonitorTx", - .value = NR_MONITOR_TX, - }, - { - .name = "RegMonitorRx", - .value = NR_MONITOR_RX, - }, - /* Add 'netmap_rings.flags' constants to the module. */ - { - .name = "NrTimestamp", - .value = NR_TIMESTAMP, - }, - { - .name = "NrForward", - .value = NR_FORWARD, - }, - /* Add 'netmap_slot.flags' constants to the module. */ - { - .name = "NsBufChanged", - .value = NS_BUF_CHANGED, - }, - { - .name = "NsReport", - .value = NS_REPORT, - }, - { - .name = "NsForward", - .value = NS_FORWARD, - }, - { - .name = "NsNoLearn", - .value = NS_NO_LEARN, - }, - { - .name = "NsIndirect", - .value = NS_INDIRECT, - }, - { - .name = "NsMorefrag", - .value = NS_MOREFRAG, - }, - /* Add bridge management commands. */ - { - .name = "BdgAttach", - .value = NETMAP_BDG_ATTACH, - }, - { - .name = "BdgDetach", - .value = NETMAP_BDG_DETACH, - }, - { - .name = "BdgList", - .value = NETMAP_BDG_LIST, - }, - { - .name = "BdgVnetHdr", - .value = NETMAP_BDG_VNET_HDR, - }, - { - .name = "BdgHost", - .value = NETMAP_BDG_HOST, - } -}; - - -/*############################### Module init #############################*/ -PyMODINIT_FUNC -initnetmap() -{ - PyObject *module; - int i; - - /* Initialize Netmap***Type. */ - if (PyType_Ready(&NetmapManagerType) < 0) - return; - if (PyType_Ready(&NetmapInterfaceType) < 0) - return; - if (PyType_Ready(&NetmapRingType) < 0) - return; - if (PyType_Ready(&NetmapSlotType) < 0) - return; - if (PyType_Ready(&NetmapDescType) < 0) - return; - - /* Create the python module. */ - module = Py_InitModule3("netmap", netmap_functions, - "Netmap bindings for Python."); - - /* Add the Netmap***Type to the module. */ - Py_INCREF(&NetmapManagerType); - PyModule_AddObject(module, "Netmap", (PyObject *)&NetmapManagerType); - Py_INCREF(&NetmapInterfaceType); - PyModule_AddObject(module, "NetmapInterface", - (PyObject *)&NetmapInterfaceType); - Py_INCREF(&NetmapRingType); - PyModule_AddObject(module, "NetmapRing", (PyObject *)&NetmapRingType); - Py_INCREF(&NetmapSlotType); - PyModule_AddObject(module, "NetmapSlot", (PyObject *)&NetmapSlotType); - Py_INCREF(&NetmapDescType); - PyModule_AddObject(module, "NetmapDesc", (PyObject *)&NetmapDescType); - - /* Add the NetmapError to the module. */ - NetmapError = PyErr_NewException("netmap.error", NULL, NULL); - Py_INCREF(NetmapError); - PyModule_AddObject(module, "error", NetmapError); - - /* Add some integer constants to the module. */ - for (i = 0; i < sizeof(netmap_constants)/sizeof(struct NetmapConst); i++) { - PyModule_AddIntConstant(module, netmap_constants[i].name, - netmap_constants[i].value); - } -} - diff --git a/private/extra/python/netmap_classes.h b/private/extra/python/netmap_classes.h deleted file mode 100644 index 9f0736445..000000000 --- a/private/extra/python/netmap_classes.h +++ /dev/null @@ -1,110 +0,0 @@ -#include -#include -#include - - -extern PyObject *NetmapError; - -/* Utilities implemented in netmap.c. */ -PyObject *string_get(PyObject *str); -int string_set(PyObject **str, PyObject *value); -void netmap_flags_pretty(unsigned int flags, char *str, int avail, - unsigned int *values, const char **strings, int items); -void ringid_pretty_print(uint32_t nr_ringid, uint32_t nr_flags, - char *ringid, int rsz, char *flags, int fsz); - - -/* Netmap memory representation. */ -typedef struct { - PyObject *interface; - PyObject *transmit_rings; - PyObject *receive_rings; - -} NetmapMemory; - -void NetmapMemory_dealloc(NetmapMemory *memory); -void NetmapMemory_new(NetmapMemory *memory); -int NetmapMemory_setup(NetmapMemory *memory, struct netmap_if *nifp, - int num_tx_rings, int num_rx_rings); -void NetmapMemory_destroy(NetmapMemory *memory); - -/* - * Main class of the netmap module, managing - * a netmap port. - */ -typedef struct { - PyObject_HEAD - PyObject *dev_name; /* Netmap device name. */ - - PyObject *if_name; - struct nmreq nmreq; /* The netmap request we are wrapping. */ - - /* Netmap memory representation. */ - NetmapMemory memory; - - /* Internal variables. */ - int _state; -#define INVALID_FD (-1) - int _fd; /* Netmap device file descriptor. */ - void *_memaddr; /* Netmap memory-mapped area. */ -} NetmapManager; - -extern PyTypeObject NetmapManagerType; - - -/* - * A simpler alternative to the NetmapManager class, which makes use of the - * nm_open()/nm_close() API. - */ -typedef struct { - PyObject_HEAD - - struct nm_desc *nmd; /* The netmap descriptor object we are wrapping. */ - - /* Netmap memory representation. */ - NetmapMemory memory; -} NetmapDesc; - -extern PyTypeObject NetmapDescType; - - -/* Class wrapper for the netmap_if struct. */ -typedef struct { - PyObject_HEAD - - struct netmap_if *_nifp; /* Address of struct netmap_if. */ -} NetmapInterface; - -extern PyTypeObject NetmapInterfaceType; - -int NetmapInterface_build(NetmapInterface *self, void *addr); -void NetmapInterface_destroy(NetmapInterface *self); - - -/* Class wrapper for the netmap_ring struct. */ -typedef struct { - PyObject_HEAD - PyObject *slots; - - struct netmap_ring *_ring; /* Address of struct netmap_ring. */ -} NetmapRing; - -extern PyTypeObject NetmapRingType; - -int NetmapRing_build(NetmapRing *self, void *addr); -void NetmapRing_destroy(NetmapRing *self); - - -/* Class wrapper for the netmap_slot struct. */ -typedef struct { - PyObject_HEAD - PyObject *memoryview; - - Py_buffer _view; - struct netmap_slot *_slot; /* Address of struct netmap_slot. */ -} NetmapSlot; - -extern PyTypeObject NetmapSlotType; - -int NetmapSlot_build(NetmapSlot *slot, void *addr, void *buf); -void NetmapSlot_destroy(NetmapSlot *slot); diff --git a/private/extra/python/netmap_desc.c b/private/extra/python/netmap_desc.c deleted file mode 100644 index f6058270d..000000000 --- a/private/extra/python/netmap_desc.c +++ /dev/null @@ -1,247 +0,0 @@ -#include "netmap_classes.h" - -#include -#include /* open() */ -#include /* ioctl() */ -#include /* mmap() */ -#include /* IFNAMSIZ */ -#include -#include -#define NETMAP_WITH_LIBS -#include - - -/* Destructor method for NetmapDescType. */ -static void -NetmapDesc_dealloc(NetmapDesc* self) -{ - NetmapMemory_dealloc(&self->memory); - - if (self->nmd) { - nm_close(self->nmd); - } - self->ob_type->tp_free((PyObject*)self); -} - -/* Netmap.__new__() is the constructor. */ -static PyObject * -NetmapDesc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - NetmapDesc *self; - - self = (NetmapDesc *)type->tp_alloc(type, 0); - if (self == NULL) { - return NULL; - } - - self->nmd = NULL; - NetmapMemory_new(&self->memory); - - return (PyObject *)self; -} - -/* Netmap.__init__(), may be called many times, or not called at all. */ -static int -NetmapDesc_init(NetmapDesc *self, PyObject *args, PyObject *kwds) -{ - PyObject *dev_name = NULL; - static char *kwlist[] = {"ifname", "flags", NULL}; - const char *ifname; - unsigned long flags; - int ret; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|k", kwlist, - &ifname, &flags)) { - return -1; - } - - /* Open the netmap device and register an interface. */ - self->nmd = nm_open(ifname, NULL, flags, NULL); - if (self->nmd == NULL) { - PyErr_SetString(NetmapError, "nm_open() failed"); - return -1; - } - - /* Setup the netmap memory layout. The +1 are here to take into account - the host rings. */ - ret = NetmapMemory_setup(&self->memory, self->nmd->nifp, - self->nmd->req.nr_tx_rings + 1, - self->nmd->req.nr_rx_rings + 1); - - return ret; -} - -static PyObject * -NetmapDesc_repr(NetmapDesc *self) -{ - PyObject *result; - char ringid[128]; - char flags[128]; - - ringid_pretty_print(self->nmd->req.nr_ringid, self->nmd->req.nr_flags, - ringid, sizeof(ringid), flags, sizeof(flags)); - - result = PyString_FromFormat( - "if_name: '%s'\n" - "ringid: '%s'\n" - "flags: '%s'\n", - self->nmd->req.nr_name, ringid, flags); - - return result; -} - - -static PyMemberDef NetmapDesc_members[] = { - {NULL} /* Sentinel */ -}; - - -/*########################## set/get methods #######################*/ - -#define NETMAP_MANAGER_DEFINE_GETSET(obj) \ -static PyObject * \ -NetmapDesc_##obj##_get(NetmapDesc *self, void *closure) \ -{ \ - if (self->memory.obj == NULL) { \ - Py_RETURN_NONE; \ - } \ - Py_INCREF(self->memory.obj); \ - return self->memory.obj; \ -} \ - \ -static int \ -NetmapDesc_##obj##_set(NetmapDesc *self, PyObject *value, \ - void *closure) \ -{ \ - if (value == NULL) { \ - PyErr_SetString(PyExc_TypeError, "Cannot delete the attribute"); \ - } else { \ - PyErr_SetString(PyExc_TypeError, "Cannot modify the attribute"); \ - } \ - return -1; \ -} - -NETMAP_MANAGER_DEFINE_GETSET(interface); -NETMAP_MANAGER_DEFINE_GETSET(transmit_rings); -NETMAP_MANAGER_DEFINE_GETSET(receive_rings); - -#define NETMAP_MANAGER_DECLARE_GETSET(obj, desc) \ - {#obj, \ - (getter)NetmapDesc_##obj##_get, \ - (setter)NetmapDesc_##obj##_set, \ - desc, \ - NULL} - - -static PyGetSetDef NetmapDesc_getseters[] = { - NETMAP_MANAGER_DECLARE_GETSET(interface, "NetmapInterface object"), - NETMAP_MANAGER_DECLARE_GETSET(transmit_rings, - "List of NetmapRing objects (Tx)"), - NETMAP_MANAGER_DECLARE_GETSET(receive_rings, - "List of NetmapRing objects (Rx)"), - {NULL} /* Sentinel */ -}; - - -/*########################## NetmapDesc methods ########################*/ - -static PyObject * -NetmapDesc_xxsync(NetmapDesc *self, int iocmd) -{ - int ret; - - /* Issue the request to the netmap device. */ - ret = ioctl(self->nmd->fd, iocmd, NULL); - if (ret) { - PyErr_SetFromErrno(NetmapError); - return NULL; - } - - Py_RETURN_NONE; -} - -static PyObject * -NetmapDesc_txsync(NetmapDesc *self) -{ - return NetmapDesc_xxsync(self, NIOCTXSYNC); -} - -static PyObject * -NetmapDesc_rxsync(NetmapDesc *self) -{ - return NetmapDesc_xxsync(self, NIOCRXSYNC); -} - -static PyObject * -NetmapDesc_getfd(NetmapDesc *self) -{ - return Py_BuildValue("i", self->nmd->fd); -} - -static PyObject * -NetmapDesc_getringid(NetmapDesc *self) -{ - return Py_BuildValue("kk", self->nmd->req.nr_ringid, - self->nmd->req.nr_flags); -} - -/* A container for the netmap methods. */ -static PyMethodDef NetmapDesc_methods[] = { - {"txsync", (PyCFunction)NetmapDesc_txsync, METH_NOARGS, - "Do a txsync on the registered rings" - }, - {"rxsync", (PyCFunction)NetmapDesc_rxsync, METH_NOARGS, - "Do a rxsync on the registered rings" - }, - {"getfd", (PyCFunction)NetmapDesc_getfd, METH_NOARGS, - "Get the file descriptor of the open netmap device" - }, - {"getringid", (PyCFunction)NetmapDesc_getringid, METH_NOARGS, - "Get the nr_ringid and nr_flags of the registered interface" - }, - {NULL} /* Sentinel */ -}; - -/* Definition exported to netmap.c. */ -PyTypeObject NetmapDescType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "netmap.NetmapDesc", /*tp_name*/ - sizeof(NetmapDesc), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)NetmapDesc_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)NetmapDesc_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "Netmap descriptor object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NetmapDesc_methods, /* tp_methods */ - NetmapDesc_members, /* tp_members */ - NetmapDesc_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)NetmapDesc_init, /* tp_init */ - 0, /* tp_alloc */ - NetmapDesc_new, /* tp_new */ -}; - diff --git a/private/extra/python/netmap_interface.c b/private/extra/python/netmap_interface.c deleted file mode 100644 index e0a812444..000000000 --- a/private/extra/python/netmap_interface.c +++ /dev/null @@ -1,225 +0,0 @@ -#include "netmap_classes.h" - -#include - - -static void -NetmapInterface_dealloc(NetmapInterface* self) -{ - self->ob_type->tp_free((PyObject*)self); -} - -static PyObject * -NetmapInterface_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - NetmapInterface *self; - - self = (NetmapInterface *)type->tp_alloc(type, 0); - if (self != NULL) { - self->_nifp = NULL; - } - - return (PyObject *)self; -} - -static PyObject * -NetmapInterface_repr(NetmapInterface *self) -{ - PyObject *result; - struct netmap_if *nifp = self->_nifp; - - if (nifp == NULL) { - return Py_BuildValue("s", "Invalid NetmapInterface"); - } - - result = PyString_FromFormat( - "name: '%s'\n" - "version: %u\n" - "flags: 0x%08x\n" - "tx_rings: %u\n" - "rx_rings: %u\n" - "bufs_head: %u\n" - "spare1[0]: 0x%08x\n" - "spare1[1]: 0x%08x\n" - "spare1[2]: 0x%08x\n" - "spare1[3]: 0x%08x\n" - "spare1[4]: 0x%08x\n", - nifp->ni_name, - nifp->ni_version, - nifp->ni_flags, - nifp->ni_tx_rings, - nifp->ni_rx_rings, - nifp->ni_bufs_head, - nifp->ni_spare1[0], - nifp->ni_spare1[1], - nifp->ni_spare1[2], - nifp->ni_spare1[3], - nifp->ni_spare1[4] - ); - - return result; -} - - -static PyMemberDef NetmapInterface_members[] = { - {NULL} -}; - -int -NetmapInterface_build(NetmapInterface *self, void *addr) -{ - self->_nifp = addr; - - return 0; -} - -void -NetmapInterface_destroy(NetmapInterface *self) -{ - self->_nifp = NULL; -} - -/*########################## set/get methods #######################*/ - -static PyObject * -NetmapInterface_name_get(NetmapInterface *self, void *closure) -{ - if (!self->_nifp) { - /* This cannot happen for NetmapInterface object created by - a NetmapManager object, but may happen for standalone - NetmapInterface objects. */ - Py_RETURN_NONE; - } - - return Py_BuildValue("s", self->_nifp->ni_name); -} - -static int -NetmapInterface_name_set(NetmapInterface *self, PyObject *value, void *closure) -{ - const char *str; - size_t len; - - if (!self->_nifp) { - /* See comment in NetmapInterface_name_get(). */ - PyErr_SetString(PyExc_TypeError, "Attribute not available"); - return -1; - } - - str = PyString_AsString(value); - if (str == NULL) { - return -1; - } - - len = PyString_Size(value); - if (len > IFNAMSIZ-1) { - len = IFNAMSIZ-1; - } - memcpy(self->_nifp->ni_name, str, len); - self->_nifp->ni_name[len] = '\0'; - - return 0; -} - -#define DEFINE_NETMAP_INTERFACE_U32_GETSET(x) \ -static PyObject * \ -NetmapInterface_##x##_get(NetmapInterface *self, void *closure) \ -{ \ - if (!self->_nifp) { \ - Py_RETURN_NONE; \ - } \ - return Py_BuildValue("I", self->_nifp->ni_##x); \ -} \ - \ -static int \ -NetmapInterface_##x##_set(NetmapInterface *self, PyObject *value, \ - void *closure) \ -{ \ - long x; \ - if (!self->_nifp) { \ - PyErr_SetString(PyExc_TypeError, "Attribute not available"); \ - return -1; \ - } \ - x = PyInt_AsLong(value); \ - if (x == -1 && PyErr_Occurred()) { \ - return -1; \ - } \ - /* Override the 'const' specifier. */ \ - *((uint32_t *)&self->_nifp->ni_##x) = (uint32_t)x; \ - return 0; \ -} - -DEFINE_NETMAP_INTERFACE_U32_GETSET(version); -DEFINE_NETMAP_INTERFACE_U32_GETSET(flags); -DEFINE_NETMAP_INTERFACE_U32_GETSET(tx_rings); -DEFINE_NETMAP_INTERFACE_U32_GETSET(rx_rings); -DEFINE_NETMAP_INTERFACE_U32_GETSET(bufs_head); - -#define DECLARE_NETMAP_INTERFACE_U32_GETSETERS(x) \ - {#x, \ - (getter)NetmapInterface_##x##_get, \ - (setter)NetmapInterface_##x##_set, \ - "netmap interface " #x " field", \ - NULL} - -static PyGetSetDef NetmapInterface_getseters[] = { - {"name", - (getter)NetmapInterface_name_get, (setter)NetmapInterface_name_set, - "netmap interface name field", - NULL}, - DECLARE_NETMAP_INTERFACE_U32_GETSETERS(version), - DECLARE_NETMAP_INTERFACE_U32_GETSETERS(flags), - DECLARE_NETMAP_INTERFACE_U32_GETSETERS(tx_rings), - DECLARE_NETMAP_INTERFACE_U32_GETSETERS(rx_rings), - DECLARE_NETMAP_INTERFACE_U32_GETSETERS(bufs_head), - {NULL} /* Sentinel */ -}; - - -static PyMethodDef NetmapInterface_methods[] = { - {NULL} -}; - -/* Definition exported to netmap.c. */ -PyTypeObject NetmapInterfaceType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "netmap.NetmapInterface", /*tp_name*/ - sizeof(NetmapInterface), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)NetmapInterface_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)NetmapInterface_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "Netmap interface object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NetmapInterface_methods, /* tp_methods */ - NetmapInterface_members, /* tp_members */ - NetmapInterface_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - NetmapInterface_new, /* tp_new */ -}; - diff --git a/private/extra/python/netmap_manager.c b/private/extra/python/netmap_manager.c deleted file mode 100644 index 9bb202607..000000000 --- a/private/extra/python/netmap_manager.c +++ /dev/null @@ -1,582 +0,0 @@ -#include "netmap_classes.h" - -#include -#include /* open() */ -#include /* ioctl() */ -#include /* mmap() */ -#include /* IFNAMSIZ */ -#include -#include - - -enum { - MANAGER_CLOSED = 0, - MANAGER_OPENED = 1, - MANAGER_REGISTERED = 2 -}; - -/* Destructor method for NetmapManagerType. */ -static void -NetmapManager_dealloc(NetmapManager* self) -{ - /* The 'X' is necessary only here: In all the other places, because of - our getters/setters we are sure that PyObject* members cannot be NULL. - */ - Py_XDECREF(self->dev_name); - Py_XDECREF(self->if_name); - NetmapMemory_dealloc(&self->memory); - self->ob_type->tp_free((PyObject*)self); -} - -/* Netmap.__new__() is the constructor. */ -static PyObject * -NetmapManager_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - NetmapManager *self; - - self = (NetmapManager *)type->tp_alloc(type, 0); - if (self == NULL) { - return NULL; - } - - /* Init with defaults. */ - self->dev_name = PyString_FromString("/dev/netmap"); - if (self->dev_name == NULL) { - Py_DECREF(self); - return NULL; - } - - self->if_name = PyString_FromString(""); - if (self->if_name == NULL) { - Py_DECREF(self); - return NULL; - } - - memset(&self->nmreq, 0, sizeof(self->nmreq)); - self->nmreq.nr_version = NETMAP_API; - self->nmreq.nr_flags = NR_REG_DEFAULT; /* Legacy 'ringid'. */ - self->nmreq.nr_ringid = 0; /* Bind all physical rings. */ - - NetmapMemory_new(&self->memory); - - self->_state = MANAGER_CLOSED; - self->_fd = INVALID_FD; - self->_memaddr = NULL; - - return (PyObject *)self; -} - -/* Netmap.__init__(), may be called many times, or not called at all. */ -static int -NetmapManager_init(NetmapManager *self, PyObject *args, PyObject *kwds) -{ - PyObject *dev_name = NULL; - static char *kwlist[] = {"dev_name", "version", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|SI", kwlist, - &dev_name, &self->nmreq.nr_version)) { - return -1; - } - - if (dev_name) { - PyObject *tmp; - - /* Safe reference. */ - tmp = self->dev_name; - Py_INCREF(dev_name); - self->dev_name = dev_name; - Py_XDECREF(tmp); - } - - return 0; -} - -static PyObject * -NetmapManager_repr(NetmapManager *self) -{ - PyObject *result; - char ringid[128]; - char flags[128]; - char cmd[64]; - struct nmreq *req = &self->nmreq; - - /* Fills in the 'ringid' and 'flags' string buffers. */ - ringid_pretty_print(req->nr_ringid, req->nr_flags, ringid, - sizeof(ringid), flags, sizeof(flags)); - - switch (req->nr_cmd) { - case 0: - sprintf(cmd, "None"); - break; - case NETMAP_BDG_ATTACH: - sprintf(cmd, "Bridge attach"); - break; - case NETMAP_BDG_DETACH: - sprintf(cmd, "Bridge detach"); - break; - case NETMAP_BDG_REGOPS: - sprintf(cmd, "Bridge lookup register"); - break; - case NETMAP_BDG_LIST: - sprintf(cmd, "Bridge list"); - break; - case NETMAP_BDG_VNET_HDR: - sprintf(cmd, "Bridge set virtio-net header length"); - break; - } - - result = PyString_FromFormat( - "dev_name: '%s'\n" - "if_name: '%s'\n" - "version: %d\n" - "memsize: %u KiB\n" - "offset: %u\n" - "tx_slots: %d\n" - "rx_slots: %d\n" - "tx_rings: %d\n" - "rx_rings: %d\n" - "ringid: %s\n" - "cmd: [%d] %s\n" - "arg1: %d\n" - "arg2: %d\n" - "arg3: %d\n" - "flags: %s\n" - "spare2: %d\n", - PyString_AsString(self->dev_name), - PyString_AsString(self->if_name), req->nr_version, - req->nr_memsize / 1024, req->nr_offset, - req->nr_tx_slots, req->nr_rx_slots, - req->nr_tx_rings, req->nr_rx_rings, - ringid, req->nr_cmd, cmd, req->nr_arg1, - req->nr_arg2, req->nr_arg3, flags, req->spare2[0] - ); - - return result; -} - - -/* A container for Netmap attributes where set/get methods are - managed automatically. */ -static PyMemberDef NetmapManager_members[] = { - {"version", T_UINT, offsetof(NetmapManager, nmreq.nr_version), 0, - "netmap API version"}, - {"tx_slots", T_UINT, offsetof(NetmapManager, nmreq.nr_tx_slots), 0, - "number of TX slots in each ring"}, - {"rx_slots", T_UINT, offsetof(NetmapManager, nmreq.nr_rx_slots), 0, - "number of RX slots in each ring"}, - {"tx_rings", T_USHORT, offsetof(NetmapManager, nmreq.nr_tx_rings), 0, - "number of TX rings"}, - {"rx_rings", T_USHORT, offsetof(NetmapManager, nmreq.nr_rx_rings), 0, - "number of RX rings"}, - {"ringid", T_USHORT, offsetof(NetmapManager, nmreq.nr_ringid), 0, - "identifies which rings to tie to"}, - {"cmd", T_USHORT, offsetof(NetmapManager, nmreq.nr_cmd), 0, - "cmd"}, - {"arg1", T_USHORT, offsetof(NetmapManager, nmreq.nr_arg1), 0, - "arg1 field"}, - {"arg2", T_USHORT, offsetof(NetmapManager, nmreq.nr_arg2), 0, - "arg2 field"}, - {"arg3", T_UINT, offsetof(NetmapManager, nmreq.nr_arg3), 0, - "arg3 field"}, - {"flags", T_UINT, offsetof(NetmapManager, nmreq.nr_flags), 0, - "flags"}, - {"spare2", T_UINT, offsetof(NetmapManager, nmreq.spare2[0]), 0, - "spare2 field"}, - {NULL} /* Sentinel */ -}; - - -/*########################## set/get methods #######################*/ - -static PyObject * -NetmapManager_dev_name_get(NetmapManager *self, void *closure) -{ - return string_get(self->dev_name); -} - -static int -NetmapManager_dev_name_set(NetmapManager *self, PyObject *value, void *closure) -{ - return string_set(&self->dev_name, value); -} - -static PyObject * -NetmapManager_if_name_get(NetmapManager *self, void *closure) -{ - return string_get(self->if_name); -} - -static int -NetmapManager_if_name_set(NetmapManager *self, PyObject *value, void *closure) -{ - return string_set(&self->if_name, value); -} - -#define NETMAP_MANAGER_DEFINE_GETSET(obj) \ -static PyObject * \ -NetmapManager_##obj##_get(NetmapManager *self, void *closure) \ -{ \ - if (self->memory.obj == NULL) { \ - Py_RETURN_NONE; \ - } \ - Py_INCREF(self->memory.obj); \ - return self->memory.obj; \ -} \ - \ -static int \ -NetmapManager_##obj##_set(NetmapManager *self, PyObject *value, \ - void *closure) \ -{ \ - if (value == NULL) { \ - PyErr_SetString(PyExc_TypeError, "Cannot delete the attribute"); \ - } else { \ - PyErr_SetString(PyExc_TypeError, "Cannot modify the attribute"); \ - } \ - return -1; \ -} - -NETMAP_MANAGER_DEFINE_GETSET(interface); -NETMAP_MANAGER_DEFINE_GETSET(transmit_rings); -NETMAP_MANAGER_DEFINE_GETSET(receive_rings); - -#define NETMAP_MANAGER_DECLARE_GETSET(obj, desc) \ - {#obj, \ - (getter)NetmapManager_##obj##_get, \ - (setter)NetmapManager_##obj##_set, \ - desc, \ - NULL} - - -static PyGetSetDef NetmapManager_getseters[] = { - {"dev_name", - (getter)NetmapManager_dev_name_get, (setter)NetmapManager_dev_name_set, - "netmap device name", - NULL}, - {"if_name", - (getter)NetmapManager_if_name_get, (setter)NetmapManager_if_name_set, - "interface name", - NULL}, - NETMAP_MANAGER_DECLARE_GETSET(interface, "NetmapInterface object"), - NETMAP_MANAGER_DECLARE_GETSET(transmit_rings, - "List of NetmapRing objects (Tx)"), - NETMAP_MANAGER_DECLARE_GETSET(receive_rings, - "List of NetmapRing objects (Rx)"), - {NULL} /* Sentinel */ -}; - -static void -NetmapManager_destroy(NetmapManager *self) -{ - NetmapMemory_destroy(&self->memory); -} - - -/*########################## NetmapManager methods ########################*/ - -static PyObject * -NetmapManager_open(NetmapManager* self) -{ - const char *dev_name; - int fd; - - dev_name = PyString_AsString(self->dev_name); - if (dev_name == NULL) { - return NULL; - } - - if (self->_state != MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Cannot open netmap device twice"); - return NULL; - } - - fd = open(dev_name, O_RDWR); - if (fd < 0) { - PyErr_SetFromErrno(NetmapError); - return NULL; - } - self->_fd = fd; - self->_state = MANAGER_OPENED; - - Py_RETURN_NONE; -} - -static PyObject * -NetmapManager_close(NetmapManager* self) -{ - int ret; - - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - return NULL; - } - - if (self->_memaddr) { - munmap(self->_memaddr, self->nmreq.nr_memsize); - self->_memaddr = NULL; - self->nmreq.nr_memsize = 0; - } - - ret = close(self->_fd); - if (ret) { - PyErr_SetFromErrno(NetmapError); - return NULL; - } - self->_fd = INVALID_FD; - self->_state = MANAGER_CLOSED; - - NetmapManager_destroy(self); - - Py_RETURN_NONE; -} - -static int -NetmapManager_ioctl(NetmapManager *self, int iocmd) -{ - struct nmreq req; - const char *if_name; - int ret; - - if_name = PyString_AsString(self->if_name); - if (if_name == NULL) { - return -1; - } - - /* Prepare the netmap request ioctl argument. */ - memcpy(&req, &self->nmreq, sizeof(req)); - strncpy(req.nr_name, if_name, IFNAMSIZ); - - /* Issue the request to the netmap device. */ - ret = ioctl(self->_fd, iocmd, &req); - if (ret) { - PyErr_SetFromErrno(NetmapError); - return -1; - } - - /* Request writeback. */ - memcpy(&self->nmreq, &req, sizeof(req)); - - return 0; -} - -static PyObject * -NetmapManager_register(NetmapManager *self) -{ - NetmapInterface *interface; - NetmapRing *ring; - PyObject *list; - int ret; - int i; - - if (self->_state != MANAGER_OPENED) { - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - } else if (self->_state == MANAGER_REGISTERED) { - PyErr_SetString(NetmapError, - "Netmap interface already registered"); - } - return NULL; - } - - /* Issue a NIOCREGIF command. */ - ret = NetmapManager_ioctl(self, NIOCREGIF); - if (ret == -1) { - return NULL; - } - - /* Map netmap memory area. */ - self->_memaddr = mmap(0, self->nmreq.nr_memsize, - PROT_WRITE | PROT_READ, - MAP_SHARED, self->_fd, 0); - if (self->_memaddr == MAP_FAILED) { - self->_memaddr = NULL; - PyErr_SetFromErrno(NetmapError); - return NULL; - } - - /* Setup the Python data structures corresponding to the netmap memory layout. - The +1 are here to take into account the host rings. */ - ret = NetmapMemory_setup(&self->memory, NETMAP_IF(self->_memaddr, - self->nmreq.nr_offset), self->nmreq.nr_tx_rings + 1, - self->nmreq.nr_rx_rings + 1); - if (ret) { - return NULL; - } - - self->_state = MANAGER_REGISTERED; - - Py_RETURN_NONE; -} - -static PyObject * -NetmapManager_xxsync(NetmapManager *self, int iocmd) -{ - int ret; - - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - return NULL; - } - - if (self->_state == MANAGER_OPENED) { - PyErr_SetString(NetmapError, "Netmap interface is not registered"); - return NULL; - } - - /* Issue the request to the netmap device. */ - ret = ioctl(self->_fd, iocmd, NULL); - if (ret) { - PyErr_SetFromErrno(NetmapError); - return NULL; - } - - Py_RETURN_NONE; -} - -static PyObject * -NetmapManager_txsync(NetmapManager *self) -{ - return NetmapManager_xxsync(self, NIOCTXSYNC); -} - -static PyObject * -NetmapManager_rxsync(NetmapManager *self) -{ - return NetmapManager_xxsync(self, NIOCRXSYNC); -} - -static PyObject * -NetmapManager_getfd(NetmapManager *self) -{ - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - return NULL; - } - - return Py_BuildValue("i", self->_fd); -} - -static PyObject * -NetmapManager_getinfo(NetmapManager *self) -{ - int ret; - - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - return NULL; - } - - /* Issue a NIOCGINFO command. */ - ret = NetmapManager_ioctl(self, NIOCGINFO); - if (ret == -1) { - return NULL; - } - - Py_RETURN_NONE; -} - -static PyObject * -NetmapManager_clear(NetmapManager *self) -{ - memset(&self->nmreq, 0, sizeof(self->nmreq)); - self->nmreq.nr_version = NETMAP_API; - self->nmreq.nr_flags = NR_REG_DEFAULT; /* Legacy 'ringid'. */ - self->nmreq.nr_ringid = 0; /* Bind all physical rings. */ - - Py_RETURN_NONE; -} - -static PyObject * -NetmapManager_regif(NetmapManager *self) -{ - int ret; - - if (self->_state == MANAGER_CLOSED) { - PyErr_SetString(NetmapError, "Netmap device is not opened"); - return NULL; - } - - /* Issue a NIOCGREGIF command. */ - ret = NetmapManager_ioctl(self, NIOCREGIF); - if (ret == -1) { - return NULL; - } - - Py_RETURN_NONE; -} - -/* A container for the netmap methods. */ -static PyMethodDef NetmapManager_methods[] = { - {"open", (PyCFunction)NetmapManager_open, METH_NOARGS, - "Open the netmap device" - }, - {"close", (PyCFunction)NetmapManager_close, METH_NOARGS, - "Close the netmap device" - }, - {"register", (PyCFunction)NetmapManager_register, METH_NOARGS, - "Register an interface with netmap" - }, - {"txsync", (PyCFunction)NetmapManager_txsync, METH_NOARGS, - "Do a txsync on the registered rings" - }, - {"rxsync", (PyCFunction)NetmapManager_rxsync, METH_NOARGS, - "Do a rxsync on the registered rings" - }, - {"getfd", (PyCFunction)NetmapManager_getfd, METH_NOARGS, - "Get the file descriptor of the open netmap device" - }, - {"getinfo", (PyCFunction)NetmapManager_getinfo, METH_NOARGS, - "Ask netmap for interface info" - }, - {"clear", (PyCFunction)NetmapManager_clear, METH_NOARGS, - "Reset some netmap request fields to their default values" - }, - {"regif", (PyCFunction)NetmapManager_regif, METH_NOARGS, - "Issue a NIOCREGIF command to the netmap device (can be used to issue " - "NETMAP_BDG_ATTACH and similar commands)" - }, - {NULL} /* Sentinel */ -}; - -/* Definition exported to netmap.c. */ -PyTypeObject NetmapManagerType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "netmap.Netmap", /*tp_name*/ - sizeof(NetmapManager), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)NetmapManager_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)NetmapManager_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "Netmap manager object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NetmapManager_methods, /* tp_methods */ - NetmapManager_members, /* tp_members */ - NetmapManager_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)NetmapManager_init, /* tp_init */ - 0, /* tp_alloc */ - NetmapManager_new, /* tp_new */ -}; - diff --git a/private/extra/python/netmap_memory.c b/private/extra/python/netmap_memory.c deleted file mode 100644 index b1ab7a9f8..000000000 --- a/private/extra/python/netmap_memory.c +++ /dev/null @@ -1,126 +0,0 @@ -#include "netmap_classes.h" - -#include -#include /* IFNAMSIZ */ -#include -#include - - -void -NetmapMemory_dealloc(NetmapMemory *memory) -{ - Py_XDECREF(memory->interface); - Py_XDECREF(memory->transmit_rings); - Py_XDECREF(memory->receive_rings); -} - -void -NetmapMemory_new(NetmapMemory *memory) -{ - memory->interface = NULL; - memory->transmit_rings = memory->receive_rings = NULL; -} - -int -NetmapMemory_setup(NetmapMemory *memory, struct netmap_if *nifp, - int num_tx_rings, int num_rx_rings) -{ - NetmapInterface *interface; - NetmapRing *ring; - PyObject *list; - int ret; - int i; - - /* Initialize the 'interface' child object. */ - memory->interface = PyObject_CallObject((PyObject *)&NetmapInterfaceType, - NULL); - if (!memory->interface) { - return -1; - } - interface = (NetmapInterface *)memory->interface; - NetmapInterface_build(interface, nifp); - - /* Initialize the 'transmit_rings' child object. */ - list = PyList_New(num_tx_rings); - if (!list) { - return -1; - } - memory->transmit_rings = list; - for (i = 0; i < num_tx_rings; i++) { - ring = (NetmapRing *)PyObject_CallObject((PyObject *)&NetmapRingType, - NULL); - if (!ring) { - return -1; - } - ret = NetmapRing_build(ring, NETMAP_TXRING(nifp, i)); - if (ret) { - return -1; - } - ret = PyList_SetItem(list, i, (PyObject *)ring); - if (ret) { - return -1; - } - } - - /* Initialize the 'receive_rings' child object. */ - list = PyList_New(num_rx_rings); - if (!list) { - return -1; - } - memory->receive_rings = list; - for (i = 0; i < num_rx_rings; i++) { - ring = (NetmapRing *)PyObject_CallObject((PyObject *)&NetmapRingType, - NULL); - if (!ring) { - return -1; - } - ret = NetmapRing_build(ring, NETMAP_RXRING(nifp, i)); - if (ret) { - return -1; - } - ret = PyList_SetItem(list, i, (PyObject *)ring); - if (ret) { - return -1; - } - } - - return 0; -} - -void -NetmapMemory_destroy(NetmapMemory *memory) -{ - NetmapRing *ring; - int n; - int i; - - if (memory->interface) { - NetmapInterface_destroy((NetmapInterface *)memory->interface); - Py_DECREF(memory->interface); - memory->interface = NULL; - } - - if (memory->transmit_rings) { - n = PyList_Size(memory->transmit_rings); - for (i = 0; i < n; i++) { - ring = (NetmapRing *)PyList_GetItem(memory->transmit_rings, i); - if (ring) { - NetmapRing_destroy(ring); - } - } - Py_DECREF(memory->transmit_rings); - memory->transmit_rings = NULL; - } - - if (memory->receive_rings) { - n = PyList_Size(memory->receive_rings); - for (i = 0; i < n; i++) { - ring = (NetmapRing *)PyList_GetItem(memory->receive_rings, i); - if (ring) { - NetmapRing_destroy(ring); - } - } - Py_DECREF(memory->receive_rings); - memory->receive_rings = NULL; - } -} diff --git a/private/extra/python/netmap_ring.c b/private/extra/python/netmap_ring.c deleted file mode 100644 index 9295ab57c..000000000 --- a/private/extra/python/netmap_ring.c +++ /dev/null @@ -1,338 +0,0 @@ -#include "netmap_classes.h" - -#include - -#include -#include -#include - - -static void -NetmapRing_dealloc(NetmapRing* self) -{ - Py_XDECREF(self->slots); - self->ob_type->tp_free((PyObject*)self); -} - -static PyObject * -NetmapRing_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - NetmapRing *self; - - self = (NetmapRing *)type->tp_alloc(type, 0); - if (self != NULL) { - self->_ring = NULL; - self->slots = NULL; - } - - return (PyObject *)self; -} - -/* Static data for flags pretty printing. */ -static unsigned int nr_flag_values[] = {NR_TIMESTAMP, NR_FORWARD}; -static const char *nr_flag_strings[] = {"NrTimestamp", "NrForward"}; - -static PyObject * -NetmapRing_repr(NetmapRing *self) -{ - PyObject *result; - struct netmap_ring *ring= self->_ring; - char flags[256]; - - if (ring == NULL) { - return Py_BuildValue("s", "Invalid NetmapRing"); - } - - netmap_flags_pretty(ring->flags, flags, sizeof(flags), nr_flag_values, - nr_flag_strings, - sizeof(nr_flag_values)/sizeof(*nr_flag_values)); - - result = PyString_FromFormat( - "buf_ofs: 0x%016x\n" - "num_slots: %u\n" - "nr_buf_size: %u\n" - "ringid: %u\n" - "dir: %u\n" - "head: %u\n" - "cur: %u\n" - "tail: %u\n" - "flags: [0x%08x] %s\n" - "tv_sec: %u\n" - "tv_usec: %u\n" - /* TODO sem */, - ring->buf_ofs, - ring->num_slots, - ring->nr_buf_size, - ring->ringid, - ring->dir, - ring->head, - ring->cur, - ring->tail, - ring->flags, - flags, - ring->ts.tv_sec, - ring->ts.tv_usec - ); - - return result; -} - -int -NetmapRing_build(NetmapRing *self, void *addr) -{ - NetmapSlot *slot; - PyObject *list; - int ret; - int i; - int n; - - if (self->_ring) { - PyErr_SetString(NetmapError, "Internal error: cannot connect" - " a ring twice"); - return -1; - } - - /* Init the pointer to the netmap_ring struct. */ - self->_ring = addr; - n = self->_ring->num_slots; - - /* Create and populate the list of netmap slots. */ - list = PyList_New(n); - if (!list) { - return -1; - } - self->slots = list; - - for (i = 0; i < n; i++) { - slot = (NetmapSlot *)PyObject_CallObject((PyObject *)&NetmapSlotType, - NULL); - if (!slot) { - return -1; - } - ret = NetmapSlot_build(slot, &self->_ring->slot[i], - NETMAP_BUF(self->_ring, - self->_ring->slot[i].buf_idx)); - if (ret == -1) { - return -1; - } - - ret = PyList_SetItem(list, i, (PyObject *)slot); - if (ret == -1) { - return -1; - } - } - - return 0; -} - -void -NetmapRing_destroy(NetmapRing *self) -{ - self->_ring = NULL; - - if (self->slots) { - NetmapSlot *slot; - int n; - int i; - - n = PyList_Size(self->slots); - for (i = 0; i < n; i++) { - slot = (NetmapSlot *)PyList_GetItem(self->slots, i); - if (slot) { - NetmapSlot_destroy(slot); - } - } - Py_DECREF(self->slots); - self->slots = NULL; - } -} - -static PyMemberDef NetmapRing_members[] = { - {NULL} -}; - - -/*########################## set/get methods #######################*/ - -static PyObject * -NetmapRing_slots_get(NetmapRing *self, void *closure) -{ - if (self->slots == NULL) { - Py_RETURN_NONE; - } - Py_INCREF(self->slots); - - return self->slots; -} - -static int -NetmapRing_slots_set(NetmapRing *self, PyObject *value, void *closure) -{ - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete the attribute"); - } else { - PyErr_SetString(PyExc_TypeError, "Cannot modify the attribute"); - } - - return -1; -} - -#define DEFINE_NETMAP_RING_GETSET(field, type, format) \ -static PyObject * \ -NetmapRing_##field##_get(NetmapRing *self, void *closure) \ -{ \ - if (!self->_ring) { \ - Py_RETURN_NONE; \ - } \ - return Py_BuildValue(format, self->_ring->field); \ -} \ - \ -static int \ -NetmapRing_##field##_set(NetmapRing *self, PyObject *value, void *closure) \ -{ \ - long x; \ - if (!self->_ring) { \ - PyErr_SetString(PyExc_TypeError, "Attribute not available"); \ - return -1; \ - } \ - x = PyInt_AsLong(value); \ - if (x == -1 && PyErr_Occurred()) { \ - return -1; \ - } \ - /* Override the 'const' specifier. */ \ - *((type *)&self->_ring->field) = (type)x; \ - return 0; \ -} - -#define DEFINE_NETMAP_RING_GETSET_TV(field) \ -static PyObject * \ -NetmapRing_##field##_get(NetmapRing *self, void *closure) \ -{ \ - if (!self->_ring) { \ - Py_RETURN_NONE; \ - } \ - return Py_BuildValue("I", self->_ring->ts.field); \ -} \ - \ -static int \ -NetmapRing_##field##_set(NetmapRing *self, PyObject *value, void *closure) \ -{ \ - long x; \ - if (!self->_ring) { \ - PyErr_SetString(PyExc_TypeError, "Attribute not available"); \ - return -1; \ - } \ - x = PyInt_AsLong(value); \ - if (x == -1 && PyErr_Occurred()) { \ - return -1; \ - } \ - /* Override the 'const' specifier. */ \ - *((uint32_t *)&self->_ring->ts.field) = (uint32_t)x; \ - return 0; \ -} - -DEFINE_NETMAP_RING_GETSET(num_slots, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET(nr_buf_size, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET(ringid, uint16_t, "I"); -DEFINE_NETMAP_RING_GETSET(dir, uint16_t, "I"); -DEFINE_NETMAP_RING_GETSET(head, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET(cur, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET(tail, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET(flags, uint32_t, "I"); -DEFINE_NETMAP_RING_GETSET_TV(tv_sec); -DEFINE_NETMAP_RING_GETSET_TV(tv_usec); - -#define DECLARE_NETMAP_RING_GETSETERS(field) \ - {#field, \ - (getter)NetmapRing_##field##_get, (setter)NetmapRing_##field##_set, \ - "netmap ring " #field " field", \ - NULL} - -static PyGetSetDef NetmapRing_getseters[] = { - DECLARE_NETMAP_RING_GETSETERS(num_slots), - DECLARE_NETMAP_RING_GETSETERS(nr_buf_size), - DECLARE_NETMAP_RING_GETSETERS(ringid), - DECLARE_NETMAP_RING_GETSETERS(dir), - DECLARE_NETMAP_RING_GETSETERS(head), - DECLARE_NETMAP_RING_GETSETERS(cur), - DECLARE_NETMAP_RING_GETSETERS(tail), - DECLARE_NETMAP_RING_GETSETERS(flags), - DECLARE_NETMAP_RING_GETSETERS(tv_sec), - DECLARE_NETMAP_RING_GETSETERS(tv_usec), - {"slots", - (getter)NetmapRing_slots_get, (setter)NetmapRing_slots_set, - "netmap ring slots", - NULL}, - {NULL} /* Sentinel */ -}; - - -static PyObject* -NetmapRing_space(NetmapRing *self) -{ - return Py_BuildValue("i", nm_ring_space(self->_ring)); -} - -static PyObject* -NetmapRing_empty(NetmapRing *self) -{ - if (nm_ring_empty(self->_ring)) { - Py_RETURN_TRUE; - } - - Py_RETURN_FALSE; -} - -static PyMethodDef NetmapRing_methods[] = { - {"space", (PyCFunction)NetmapRing_space, METH_NOARGS, - "Return the number of available ring slots" - }, - {"empty", (PyCFunction)NetmapRing_empty, METH_NOARGS, - "Returns True if the ring is empty (no available slots)" - }, - {NULL} -}; - -/* Definition exported to netmap.c. */ -PyTypeObject NetmapRingType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "netmap.NetmapRing", /*tp_name*/ - sizeof(NetmapRing), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)NetmapRing_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)NetmapRing_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "Netmap interface object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NetmapRing_methods, /* tp_methods */ - NetmapRing_members, /* tp_members */ - NetmapRing_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - NetmapRing_new, /* tp_new */ -}; - diff --git a/private/extra/python/netmap_slot.c b/private/extra/python/netmap_slot.c deleted file mode 100644 index 58946eeb5..000000000 --- a/private/extra/python/netmap_slot.c +++ /dev/null @@ -1,240 +0,0 @@ -#include "netmap_classes.h" - -#include - - -static void -NetmapSlot_dealloc(NetmapSlot* self) -{ - if (self->_view.buf) { - /* XXX Should I free this? I hope self->memoryview - doesn't do it again in its destructor. */ - free(self->_view.shape); - } - Py_XDECREF(self->memoryview); - self->ob_type->tp_free((PyObject*)self); -} - -static PyObject * -NetmapSlot_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - NetmapSlot *self; - - self = (NetmapSlot *)type->tp_alloc(type, 0); - if (self != NULL) { - self->_slot = NULL; - self->memoryview = NULL; - memset(&self->_view, 0, sizeof(Py_buffer)); - } - - return (PyObject *)self; -} - -/* Static data for flags pretty printing. */ -static unsigned int ns_flag_values[] = {NS_BUF_CHANGED, NS_REPORT, - NS_FORWARD, NS_NO_LEARN, - NS_INDIRECT, NS_MOREFRAG}; - -static const char *ns_flag_strings[] = {"NsBufChanged", "NsReport", - "NsForward", "NsNoLearn", - "NsIndirect", "NsMorefrag"}; - -static PyObject * -NetmapSlot_repr(NetmapSlot *self) -{ - PyObject *result; - struct netmap_slot *slot = self->_slot; - char flags[256]; - - if (slot == NULL) { - return Py_BuildValue("s", "Invalid NetmapSlot"); - } - - netmap_flags_pretty(slot->flags, flags, sizeof(flags), ns_flag_values, - ns_flag_strings, - sizeof(ns_flag_values)/sizeof(*ns_flag_values)); - - result = PyString_FromFormat( - "buf_idx: %u\n" - "len: %u\n" - "flags: [0x%04x] %s\n" - "ptr: 0x%016x\n", - slot->buf_idx, - slot->len, - slot->flags, - flags, - slot->ptr - ); - - return result; -} - - -static PyMemberDef NetmapSlot_members[] = { - {NULL} -}; - -int -NetmapSlot_build(NetmapSlot *self, void *addr, void *buf) -{ - /* Init the pointer. */ - self->_slot = (struct netmap_slot *)addr; - - /* Populate a Py_buffer struct, which represents a C memory - buffer. */ - memset(&self->_view, 0, sizeof(Py_buffer)); - self->_view.buf = buf; - self->_view.len = self->_slot->len; - self->_view.format = "B"; - self->_view.ndim = 1; - self->_view.shape = malloc(1 * sizeof (Py_ssize_t)); - self->_view.shape[0] = self->_slot->len; - self->_view.itemsize = 1; - - /* Expose the C buffer through a 'memoryview' Python object, - so that Python code can directly access the C buffer. */ - self->memoryview = PyMemoryView_FromBuffer(&self->_view); - if (!self->memoryview) { - return -1; - } - - return 0; -} - -void -NetmapSlot_destroy(NetmapSlot *self) -{ - self->_slot = NULL; - - if (self->_view.buf) { - free(self->_view.shape); - memset(&self->_view, 0, sizeof(Py_buffer)); - } - -/* TODO should destroy self->memoryview */ -} - -/*########################## set/get methods #######################*/ - -static PyObject * -NetmapSlot_memoryview_get(NetmapSlot *self, void *closure) -{ - if (self->memoryview == NULL) { - Py_RETURN_NONE; - } - Py_INCREF(self->memoryview); - - return self->memoryview; -} - -static int -NetmapSlot_memoryview_set(NetmapSlot *self, PyObject *value, void *closure) -{ - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete the attribute"); - } else { - PyErr_SetString(PyExc_TypeError, "Cannot modify the attribute"); - } - - return -1; -} - -#define DEFINE_NETMAP_SLOT_GETSET(field, type, format) \ -static PyObject * \ -NetmapSlot_##field##_get(NetmapSlot *self, void *closure) \ -{ \ - if (!self->_slot) { \ - Py_RETURN_NONE; \ - } \ - return Py_BuildValue(format, self->_slot->field); \ -} \ - \ -static int \ -NetmapSlot_##field##_set(NetmapSlot *self, PyObject *value, void *closure) \ -{ \ - long x; \ - if (!self->_slot) { \ - PyErr_SetString(PyExc_TypeError, "Attribute not available"); \ - return -1; \ - } \ - x = PyInt_AsLong(value); \ - if (x == -1 && PyErr_Occurred()) { \ - return -1; \ - } \ - /* Override the 'const' specifier. */ \ - *((type *)&self->_slot->field) = (type)x; \ - return 0; \ -} - -DEFINE_NETMAP_SLOT_GETSET(buf_idx, uint32_t, "I"); -DEFINE_NETMAP_SLOT_GETSET(len, uint16_t, "I"); -DEFINE_NETMAP_SLOT_GETSET(flags, uint16_t, "I"); -DEFINE_NETMAP_SLOT_GETSET(ptr, uint64_t, "k"); - - -#define DECLARE_NETMAP_SLOT_GETSETERS(field) \ - {#field, \ - (getter)NetmapSlot_##field##_get, (setter)NetmapSlot_##field##_set, \ - "netmap ring " #field " field", \ - NULL} - -static PyGetSetDef NetmapSlot_getseters[] = { - DECLARE_NETMAP_SLOT_GETSETERS(buf_idx), - DECLARE_NETMAP_SLOT_GETSETERS(len), - DECLARE_NETMAP_SLOT_GETSETERS(flags), - DECLARE_NETMAP_SLOT_GETSETERS(ptr), - {"buf", - (getter)NetmapSlot_memoryview_get, (setter)NetmapSlot_memoryview_set, - "netmap buffer memoryview", - NULL}, - {NULL} /* Sentinel */ -}; - - -static PyMethodDef NetmapSlot_methods[] = { - {NULL} -}; - -/* Definition exported to netmap.c. */ -PyTypeObject NetmapSlotType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "netmap.NetmapSlot", /*tp_name*/ - sizeof(NetmapSlot), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)NetmapSlot_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - (reprfunc)NetmapSlot_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "Netmap interface object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NetmapSlot_methods, /* tp_methods */ - NetmapSlot_members, /* tp_members */ - NetmapSlot_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - NetmapSlot_new, /* tp_new */ -}; - diff --git a/private/extra/python/pktgen.py b/private/extra/python/pktgen.py deleted file mode 100644 index 0d944f7d5..000000000 --- a/private/extra/python/pktgen.py +++ /dev/null @@ -1,61 +0,0 @@ -import netmap -import time -import struct -import select - - -def build_packet(): - fmt = '!6s6sH' + '46s' - return struct.pack(fmt, '\xff'*6, '\x00'*6, 0x0800, '\x00'*50) - - -############################## MAIN ########################### -pkt = build_packet() - -# open the netmap device and register an interface -nm = netmap.Netmap() -nm.open() -nfd = nm.getfd() -nm.if_name = 'enp1s0f1' -nm.register() -time.sleep(1) - -# fill in the netmap slots and netmap buffers for tx ring 0 -txr = nm.transmit_rings[0] -num_slots = txr.num_slots -for i in range(num_slots): - txr.slots[i].buf[0:len(pkt)] = pkt - txr.slots[i].len = len(pkt) - - -# transmit at maximum speed until Ctr-C is pressed -cnt = 0 # packet counter -batch = 256 -poller = select.poll() -poller.register(nfd, select.POLLOUT) -t_start = time.time() -try: - cur = txr.cur - while 1: - ready_list = poller.poll(2) - if len(ready_list) == 0: - print "Timeout occurred" - break; - n = txr.tail - cur # avail - if n < 0: - n += num_slots - if n > batch: - n = batch - cur += n - if cur >= num_slots: - cur -= num_slots - txr.cur = txr.head = cur # lazy update txr.cur and txr.head - nm.txsync() - cnt += n -except KeyboardInterrupt: - pass -t_end = time.time() - -print "\nPackets sent: %s, Avg rate %s Kpps" % (cnt, 0.001 * cnt / (t_end - t_start)) - -nm.close() diff --git a/private/extra/python/pktman.py b/private/extra/python/pktman.py deleted file mode 100644 index 9d8aa7ff7..000000000 --- a/private/extra/python/pktman.py +++ /dev/null @@ -1,292 +0,0 @@ -import netmap # our module -import time # time measurements -import select # poll() -import argparse # program argument parsing -import multiprocessing # thread management -import re - -# import scapy suppressing the initial WARNING message -import logging -logging.getLogger("scapy.runtime").setLevel(logging.ERROR) -from scapy.all import Ether, IP, UDP # packet forgery - - -def help_quit(parser): - print "" - parser.print_help() - quit() - - -def build_packet(args, parser): - src = args.src.split(':') - dst = args.dst.split(':') - - # create the payload - base = "Hello from Python" - header_len = 14 + 20 + 8 - data = base * ((args.length-header_len)/len(base) + 1) - data = data[0:args.length-header_len] - - scap = Ether(src = args.srcmac, dst = args.dstmac) - scap = scap / IP(src = src[0], dst = dst[0]) - scap = scap / UDP(sport = int(src[1]), dport = int(dst[1])) - scap = scap / data - - try: - # checksum is computed when calling str(scap), e.g. when the packet is - # assembled - ret = str(scap) - except: - print "Packet parameters are invalid\n" - help_quit(parser) - - if args.dump: - scap.show2() - - return ret - - -def transmit(idx, suffix, args, parser, queue): - # use nm_open() to open the netmap device and register an interface - # using an extended interface name - nmd = netmap.NetmapDesc(args.interface + suffix) - time.sleep(args.wait_link) - - # build the packet that will be transmitted - pkt = build_packet(args, parser) - - # fill in the netmap slots and netmap buffers for tx ring 0 - txr = nmd.transmit_rings[idx] - num_slots = txr.num_slots - for i in range(num_slots): - txr.slots[i].buf[0:len(pkt)] = pkt - txr.slots[i].len = len(pkt) - - # transmit at maximum speed until Ctr-C is pressed - cnt = 0 # packet counter - batch = args.batch - poller = select.poll() - poller.register(nmd.getfd(), select.POLLOUT) - t_start = time.time() - try: - cur = txr.cur - while 1: - ready_list = poller.poll(2) - if len(ready_list) == 0: - print "Timeout occurred" - break; - n = txr.tail - cur # avail - if n < 0: - n += num_slots - if n > batch: - n = batch - cur += n - if cur >= num_slots: - cur -= num_slots - txr.cur = txr.head = cur # lazy update txr.cur and txr.head - nmd.txsync() - cnt += n - except KeyboardInterrupt: - # report the result to the main process - queue.put([cnt, time.time() - t_start]) - pass - - -def receive(idx, suffix, args, parser, queue): - # use nm_open() to open the netmap device and register an interface - # using an extended interface name - nmd = netmap.NetmapDesc(args.interface + suffix) - time.sleep(args.wait_link) - - # select the right ring - rxr = nmd.receive_rings[idx] - num_slots = rxr.num_slots - - cnt = 0 # packet counter - poller = select.poll() - poller.register(nmd.getfd(), select.POLLIN) - - # wait for the first packet - try: - poller.poll() - except KeyboardInterrupt: - # report the result to the main process - queue.put([cnt, None]) - return - - # receive (throwing away everything) until Ctr-C is pressed - t_start = time.time() - try: - cur = rxr.cur - while 1: - ready_list = poller.poll() - if len(ready_list) == 0: - print "Timeout occurred" - break; - n = rxr.tail - cur # avail - if n < 0: - n += num_slots - cur += n - if cur >= num_slots: - cur -= num_slots - rxr.cur = rxr.head = cur # lazy update rxr.cur and rxr.head - cnt += n - except KeyboardInterrupt: - # report the result to the main process - queue.put([cnt, time.time() - t_start]) - pass - - -# How many netmap ring couples has 'ifname'? -def netmap_max_rings(ifname): - if ifname.startswith('netmap:'): - ifname = ifname[7:] - - nm = netmap.Netmap() - nm.open() - nm.if_name = ifname - nm.getinfo() - - return nm.tx_rings - -# extract the (nr_ringid, nr_flags) specified by the extended -# interface name (nm_open() ifname) -def netmap_get_ringid(ifname_ext): - nmd = netmap.NetmapDesc(ifname_ext) - - return nmd.getringid() - -def netmap_remove_ifname_suffix(ifname_ext): - m = re.match(r'\w+:\w+', ifname_ext) - if m == None: - return None - - return m.group(0) - - -############################## MAIN ########################### - -if __name__ == '__main__': - - # functions implemented by this program - handler = dict(); - handler['tx'] = transmit - handler['rx'] = receive - - # program arguments - parser = argparse.ArgumentParser(description = 'Send and receive packet using the netmap API') - parser.add_argument('-i', '--interface', help = 'the interface to register with netmap; ' - 'can be in the form netmap:[] or [], where ' - 'OSNAME is the O.S. name for a network interface (e.g. "eth0"), ' - ' is a valid VALE port name (e.g. "vale18:2") and is an ' - 'optional extension suffix, specified using the nm_open() syntax ' - '(e.g. "^", "-5", "{44", ...)', - required = True) - parser.add_argument('-f', '--function', help = 'the function to perform', - choices = ['tx', 'rx'], default = 'rx') - parser.add_argument('-b', '--batchsize', help = 'number of packets to send with each TXSYNC ' - 'operation', type=int, default = 512, dest = 'batch') - parser.add_argument('-l', '--length', help = 'lenght of the ethernet frame sent', - type = int, default = 60) - parser.add_argument('-D', '--dstmac', help = 'destination MAC of tx packets', - default = 'ff:ff:ff:ff:ff:ff') - parser.add_argument('-S', '--srcmac', help = 'source MAC of tx packets', - default = '00:00:00:00:00:00') - parser.add_argument('-d', '--dst', help = 'destination IP address and UDP port of tx packets', - default = '10.0.0.2:54322', metavar = 'IP:PORT') - parser.add_argument('-s', '--src', help = 'source IP address and UDP port of tx packets', - default = '10.0.0.1:54321', metavar = 'IP:PORT') - parser.add_argument('-w', '--wait-link', help = 'time to wait for the link before starting ' - 'transmit/receive operations (in seconds)', type = int, default = 1) - parser.add_argument('-X', '--dump', help = 'dump the packet', action = 'store_true') - parser.add_argument('-p', '--threads', help = 'number of threads to used for tx/rx ' - 'operations', type = int, default = 1) - # parse the input - args = parser.parse_args() - # print args - - # bound checking - if args.length < 60: - print 'Invalid packet length\n' - help_quit(parser) - - if args.threads < 1: - print 'Invalid number of threads\n' - help_quit(parser) - - try: - # compute 'ifname' removing the suffix from the extended name - # specified by the user - ifname = netmap_remove_ifname_suffix(args.interface) - if ifname == None: - print 'Invalid ifname "%s"' % (args.interface, ) - help_quit(parser) - - # compute 'max_couples', which is the number of tx/rx rings couples to be registered - # according to 'args.interface' - nr_ringid, nr_flags = netmap_get_ringid(args.interface) - if nr_flags in [netmap.RegAllNic, netmap.RegNicSw]: - # ask netmap for the number of available couples - max_couples = netmap_max_rings(args.interface) - suffix_required = True - ringid_offset = 0 - else: - # all the others netmap.Reg* specifies just one couple of rings - max_couples = 1 - suffix_required = False - ringid_offset = nr_ringid - if args.threads > max_couples: - print 'You cannot use more than %s (tx,rx) rings couples with "%s"' % (max_couples, args.interface) - help_quit(parser) - except netmap.error as e: - print e - quit() - - jobs = [] # array of worker processes - queues = [] # array of queues for IPC - for i in range(args.threads): - queue = multiprocessing.Queue() - queues.append(queue) - - # 'i_off' contains the ring idx on which the process below will operate - i_off = i + ringid_offset - # it may also be necessary to add an extension suffix to the interface - # name specified by the user - if suffix_required: - suffix = '-' + str(i_off) - else: - suffix = '' - - # create a new process that will execute the user-selected handler function, - # with the arguments specified by the 'args' tuple - job = multiprocessing.Process(name = 'worker-' + str(i), - target = handler[args.function], - args = (i_off, suffix, args, parser, queue)) - job.deamon = True # ensure work termination - jobs.append(job) - - # start all the workers - for i in range(len(jobs)): - jobs[i].start() - - # Wait for the user pressing Ctrl-C - try: - while 1: - time.sleep(1000) - except KeyboardInterrupt: - pass - - # collect and print the result returned by the workers - tot_rate = 0.0 - for i in range(len(jobs)): - result = queues[i].get() - jobs[i].join() - delta = result[1] - cnt = result[0] - if delta == None: - rate = None - else: - rate = 0.001 * cnt / delta - tot_rate += rate - print '[%d] Packets processed: %s, Avg rate %s Kpps' % (i, cnt, rate) - print 'Total rate: %s' % (tot_rate, ) diff --git a/private/extra/python/setup.py b/private/extra/python/setup.py deleted file mode 100644 index 9a761a650..000000000 --- a/private/extra/python/setup.py +++ /dev/null @@ -1,15 +0,0 @@ -import glob -from distutils.core import setup, Extension - - -netmap_bindings_module = Extension('netmap', - include_dirs = ['../../sys'], - sources = glob.glob('*.c')) - -setup(name = 'NetmapBindings', - version = '11.0', - description = 'python bindings for netmap', - author = 'Vincenzo Maffione', - author_email = 'v.maffione@gmail.com', - url = 'http://info.iet.unipi.it/~luigi/netmap/', - ext_modules = [netmap_bindings_module]) diff --git a/private/extra/python/test.py b/private/extra/python/test.py deleted file mode 100644 index 772505f68..000000000 --- a/private/extra/python/test.py +++ /dev/null @@ -1,14 +0,0 @@ -import netmap - - -# see 'help(netmap)' for documentation -n = netmap.Netmap() -print n -n.open() -n.if_name = 'enp1s0f1' -n.ringid = netmap.HwRing | 3 -n.arg3 = 2 -n.register() -print n -print n.interface -n.close() diff --git a/private/extra/qemu-1.2.0-e1000-mitigation.diff b/private/extra/qemu-1.2.0-e1000-mitigation.diff deleted file mode 100644 index fb93039e9..000000000 --- a/private/extra/qemu-1.2.0-e1000-mitigation.diff +++ /dev/null @@ -1,157 +0,0 @@ -diff -urp ../work-qemu-1.2.0-prod/hw/e1000.c ./hw/e1000.c ---- ../work-qemu-1.2.0-prod/hw/e1000.c 2012-09-05 07:03:06.000000000 -0700 -+++ ./hw/e1000.c 2012-12-01 14:03:17.798698762 -0800 -@@ -24,6 +24,7 @@ - * License along with this library; if not, see . - */ - -+#define MITIGATION - - #include "hw.h" - #include "pci.h" -@@ -127,6 +128,11 @@ typedef struct E1000State_st { - } eecd_state; - - QEMUTimer *autoneg_timer; -+#ifdef MITIGATION -+ QEMUTimer *mit_timer; // handle for the timer -+ uint32_t mit_timer_on; // mitigation timer active -+ uint32_t mit_cause; // pending interrupt cause -+#endif /* MITIGATION */ - } E1000State; - - #define defreg(x) x = (E1000_##x>>2) -@@ -142,6 +148,9 @@ enum { - defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), - defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA), - defreg(VET), -+#ifdef MITIGATION -+ defreg(RDTR), defreg(RADV), defreg(TADV), defreg(ITR), -+#endif /* MITIGATION */ - }; - - static void -@@ -626,6 +635,66 @@ static uint64_t tx_desc_base(E1000State - return (bah << 32) + bal; - } - -+#ifdef MITIGATION -+/* helper function, 0 means the value is not set */ -+static inline void -+mit_update_delay(uint32_t *cur, uint32_t value) -+{ -+ if (value && (*cur == 0 || value < *cur)) -+ *cur = value; -+} -+ -+/* -+ * If necessary, rearm the timer and post an interrupt. -+ * Called at the end of tx/rx routines (mit_timer_on == 0), -+ * and when the timer fires (mit_timer_on == 1). -+ * We provide a partial implementation of interrupt mitigation, -+ * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for -+ * RADV and TADV, 256ns units for ITR). RDTR is only used to enable RADV; -+ * relative timers based on TIDV and RDTR are not implemented. -+ */ -+static void -+mit_rearm_and_int(void *opaque) -+{ -+ E1000State *s = opaque; -+ uint32_t mit_delay = 0; -+ -+ /* -+ * Clear the flag. It is only set when the callback fires, -+ * and we need to clear it anyways. -+ */ -+ s->mit_timer_on = 0; -+ if (s->mit_cause == 0) /* no events pending, we are done */ -+ return; -+ /* -+ * Compute the next mitigation delay according to pending interrupts -+ * and the current values of RADV (provided RDTR!=0), TADV and ITR. -+ * Then rearm the timer. -+ */ -+ if (s->mit_cause & (E1000_ICR_TXQE | E1000_ICR_TXDW)) -+ mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4); -+ if (s->mac_reg[RDTR] && (s->mit_cause & E1000_ICS_RXT0)) -+ mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4); -+ mit_update_delay(&mit_delay, s->mac_reg[ITR]); -+ -+ if (likely(mit_delay)) { -+ s->mit_timer_on = 1; -+ qemu_mod_timer(s->mit_timer, -+ qemu_get_clock_ns(vm_clock) + mit_delay * 256); -+ } -+ set_ics(s, 0, s->mit_cause); -+ s->mit_cause = 0; -+} -+ -+static void -+mit_set_ics(E1000State *s, uint32_t cause) -+{ -+ s->mit_cause |= cause; -+ if (!s->mit_timer_on) -+ mit_rearm_and_int(s); -+} -+#endif /* MITIGATION */ -+ - static void - start_xmit(E1000State *s) - { -@@ -663,7 +732,11 @@ start_xmit(E1000State *s) - break; - } - } -+#ifdef MITIGATION -+ mit_set_ics(s, cause); -+#else /* !MITIGATION */ - set_ics(s, 0, cause); -+#endif /* !MITIGATION */ - } - - static int -@@ -875,7 +948,11 @@ e1000_receive(NetClientState *nc, const - s->rxbuf_min_shift) - n |= E1000_ICS_RXDMT0; - -+#ifdef MITIGATION -+ mit_set_ics(s, n); -+#else /* !MITIGATION */ - set_ics(s, 0, n); -+#endif /* !MITIGATION */ - - return size; - } -@@ -978,6 +1055,9 @@ static uint32_t (*macreg_readops[])(E100 - getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS), - getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL), - getreg(TDLEN), getreg(RDLEN), -+#ifdef MITIGATION -+ getreg(RDTR), getreg(RADV), getreg(TADV), getreg(ITR), -+#endif /* MITIGATION */ - - [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4, - [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4, -@@ -994,6 +1074,10 @@ static void (*macreg_writeops[])(E1000St - putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC), - putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH), - putreg(RDBAL), putreg(LEDCTL), putreg(VET), -+#ifdef MITIGATION -+ [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit, -+ [ITR] = set_16bit, -+#endif /* MITIGATION */ - [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl, - [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics, - [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt, -@@ -1253,6 +1337,11 @@ static int pci_e1000_init(PCIDevice *pci - add_boot_device_path(d->conf.bootindex, &pci_dev->qdev, "/ethernet-phy@0"); - - d->autoneg_timer = qemu_new_timer_ms(vm_clock, e1000_autoneg_timer, d); -+#ifdef MITIGATION -+ d->mit_cause = 0; -+ d->mit_timer_on = 0; -+ d->mit_timer = qemu_new_timer_ns(vm_clock, mit_rearm_and_int, d); -+#endif /* MITIGATION */ - - return 0; - } diff --git a/private/extra/tstmp.diff b/private/extra/tstmp.diff deleted file mode 100644 index 4f7df15b9..000000000 --- a/private/extra/tstmp.diff +++ /dev/null @@ -1,146 +0,0 @@ -Index: dev/netmap/netmap.c -=================================================================== ---- dev/netmap/netmap.c (revision 258360) -+++ dev/netmap/netmap.c (working copy) -@@ -2656,6 +2656,7 @@ - */ - for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) { - kring = &na->rx_rings[i]; -+ TSTMP(1, i, kring->ring->cur, kring->ring->avail,0,0); - if (kring->ring->avail > 0) { - revents |= want_rx; - want_rx = 0; /* also breaks the loop */ -@@ -2663,6 +2664,7 @@ - } - for (i = priv->np_qfirst; want_tx && i < lim_tx; i++) { - kring = &na->tx_rings[i]; -+ TSTMP(2, i, kring->ring->cur, kring->ring->avail,0,0); - if (kring->ring->avail > 0) { - revents |= want_tx; - want_tx = 0; /* also breaks the loop */ -@@ -2705,6 +2707,7 @@ - revents |= POLLERR; - - /* Check avail/call selrecord only if called with POLLOUT */ -+ TSTMP(2, i, kring->ring->cur, kring->ring->avail,0,1); - if (want_tx) { - if (kring->ring->avail > 0) { - /* stop at the first ring. We don't risk -Index: kern/subr_smp.c -=================================================================== ---- kern/subr_smp.c (revision 258360) -+++ kern/subr_smp.c (working copy) -@@ -132,7 +132,63 @@ - } - SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL); - -+#ifdef KERN_TIMESTAMP - /* -+ * allocate 1M entries, 32 bytes each. -+ * We then split it among available CPUs. -+ */ -+#define KERN_TIMESTAMP_SIZE (1<<20) -+struct ktstmp_buf_t { uint32_t d[8]; }; -+/* write positions in the buffer */ -+static int ktstmp_index[MAXCPU]; -+ -+/* the buffers */ -+static struct ktstmp_buf_t ktstmp_buf[KERN_TIMESTAMP_SIZE]; -+static int ktstmp_range; -+ -+SYSCTL_NODE(_kern, OID_AUTO, ts, CTLFLAG_RD, 0, "TS buffers"); -+SYSCTL_OPAQUE(_kern_ts, OID_AUTO, idx, CTLFLAG_RD, ktstmp_index, -+ sizeof(ktstmp_index), "LU", "Timestamp indexes"); -+ -+SYSCTL_OPAQUE(_kern_ts, OID_AUTO, data, CTLFLAG_RD, ktstmp_buf, -+ sizeof(ktstmp_buf), "LU", "Timestamp buffers"); -+ -+void _TSTMP(uint32_t p[8]) -+{ -+ int i, pos; -+ i = curcpu; -+ pos = ktstmp_index[i]++; -+ if (pos == ktstmp_range - 1) -+ ktstmp_index[i] = 0; -+ ktstmp_buf[pos + ktstmp_range * i] = *(struct ktstmp_buf_t *)p; -+} -+ -+static void -+tstmp_init(void *dummy) -+{ -+ int i; -+ -+ ktstmp_range = KERN_TIMESTAMP_SIZE / mp_ncpus; -+ -+ for (i = 0; i < mp_ncpus; i++) { -+ struct sysctl_oid *tree; -+ char namebuf[4]; -+ snprintf(namebuf, sizeof(namebuf), "%d", i); -+printf("added child %d\n", i); -+ tree = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_ts), -+ OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Id"); -+ SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tree), OID_AUTO, "idx", -+ CTLFLAG_RD, &ktstmp_index[i], 1, "I-th index"); -+ SYSCTL_ADD_OPAQUE(NULL, SYSCTL_CHILDREN(tree), OID_AUTO, "data", -+ CTLFLAG_RD, &ktstmp_buf[i*ktstmp_range], -+ sizeof(struct ktstmp_buf_t) * ktstmp_range, -+ "LU", "I-th buffer"); -+ } -+} -+SYSINIT(cpu_mp_ts, SI_SUB_CPU, SI_ORDER_ANY, tstmp_init, NULL); -+#endif /* KERN_TIMESTAMP */ -+ -+/* - * Call the MD SMP initialization code. - */ - static void -Index: sys/param.h -=================================================================== ---- sys/param.h (revision 258360) -+++ sys/param.h (working copy) -@@ -344,4 +344,45 @@ - */ - #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset]) - -+#ifdef _KERNEL -+ -+/* -+ * We put here the definition of two debugging macros/function which -+ * are very convenient to have available. -+ * TSTMP(a,b,c,d,e,f) can be used to timestamp kernel events with the TSC, -+ * and export them to userland through a sysctl tree debug.timestamp, -+ * which holds one circular buffer per cpu. Events are 32 bytes each, -+ * formatted as TSC; LINE; a; b; c; d; e; f (all 32-bit arguments). -+ * They can be retrieved with something like -+ -+ sysctl -b kern.ts.data | \ -+ hexdump -e '"%15u %15u 0x%08x 0x%08x 0x%08x 0x%08x0x%08x 0x%08x\n"' -+ -+ * The following sysctl variables are used -+ * kern.ts.idx opaque array of MAXCPU indexes -+ * kern.ts.data opaque array of all records -+ * kern.ts.I.idx integer - the next index for cpu I -+ * kern.ts.I.data opaque array of records for cpu I -+ * -+ * The buffer is preallocated (1M entries or so) and statically divided -+ * in blocks, one for each CPUs ( sysctl kern.smp.cpus ). -+ * The dump will report first entries for CPU0, then 1 and so on. -+ * The actual TSTMP code is in kern/subr_smp.c -+ * -+ * The macros must be enabled with "options KERN_TIMESTAMP" in the kernel -+ * config file, otherwise they default to an empty block. -+ */ -+ -+#define KERN_TIMESTAMP -+#ifdef KERN_TIMESTAMP -+extern void _TSTMP(uint32_t p[8]); -+#define TSTMP(a, b, c, d, e, f) do { \ -+ uint32_t p[8] = { __LINE__, rdtsc(), a, b, c, d, e, f}; \ -+ _TSTMP(p); } while (0) -+ -+#else /* !KERN_TIMESTAMP */ -+#define TSTMP(a, b, c, d, e, f) do {} while (0) -+#endif /* !KERN_TIMESTAMP */ -+#endif /* _KERNEL */ -+ - #endif /* _SYS_PARAM_H_ */ diff --git a/private/extra/wireshark-netmap.diff b/private/extra/wireshark-netmap.diff deleted file mode 100644 index 424087da4..000000000 --- a/private/extra/wireshark-netmap.diff +++ /dev/null @@ -1,83 +0,0 @@ -diff -urp ../wireshark-1.11.2/dumpcap.c ./dumpcap.c ---- ../wireshark-1.11.2/dumpcap.c 2013-11-09 09:07:55.000000000 -0800 -+++ ./dumpcap.c 2013-12-04 13:46:02.009528218 -0800 -@@ -23,6 +23,8 @@ - - #include "config.h" - -+#define HAVE_NETMAP -+ - #include - #include /* for exit() */ - #include -@@ -418,6 +420,12 @@ static void report_cfilter_error(capture - - #define MSG_MAX_LENGTH 4096 - -+#ifdef HAVE_NETMAP -+#define NETMAP_WITH_LIBS -+#include -+#endif /* HAVE_NETMAP */ -+ -+ - /* Copied from pcapio.c pcapng_write_interface_statistics_block()*/ - static guint64 - create_timestamp(void) { -@@ -708,6 +716,15 @@ open_capture_device(interface_options *i - "pcap_open() returned %p.", (void *)pcap_h); - } else - #endif -+ -+#ifdef HAVE_NETMAP -+ if ((pcap_h = (pcap_t *)nm_open(interface_opts->name, -+ getenv("NETMAP_RING_ID"), 0, 0)) || errno > 0 ) { -+ printf("--- opening netmap %s gives %p\n", interface_opts->name, pcap_h); -+ /* can return NULL if valid name but error setting netmap */ -+ return pcap_h; -+ } else -+#endif /* HAVE_NETMAP */ - { - /* - * If we're not opening a remote device, use pcap_create() and -@@ -2740,6 +2757,11 @@ capture_loop_open_input(capture_options - - /* XXX - will this work for tshark? */ - #ifdef MUST_DO_SELECT -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pcap_opts->pcap_h)) { -+ pcap_opts->pcap_fd = NETMAP_FD(pcap_opts->pcap_h); -+ } else -+#endif /* HAVE_NETMAP */ - if (!pcap_opts->from_cap_pipe) { - #ifdef HAVE_PCAP_GET_SELECTABLE_FD - pcap_opts->pcap_fd = pcap_get_selectable_fd(pcap_opts->pcap_h); -@@ -2823,6 +2845,12 @@ capture_loop_init_filter(pcap_t *pcap_h, - - /* capture filters only work on real interfaces */ - if (cfilter && !from_cap_pipe) { -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pcap_h)) { -+ printf("no filters on netmap\n"); -+ return INITFILTER_NO_ERROR; // pretend ok -+ } -+#endif /* HAVE_NETMAP */ - /* A capture filter was specified; set it up. */ - if (!compile_capture_filter(name, pcap_h, &fcode, cfilter)) { - /* Treat this specially - our caller might try to compile this -@@ -3089,6 +3117,16 @@ capture_loop_dispatch(loop_data *ld, - * processing immediately, rather than processing all packets - * in a batch before quitting. - */ -+#ifdef HAVE_NETMAP -+ if (IS_NETMAP_DESC(pcap_opts->pcap_h)) { -+ pcap_handler cb = use_threads ? -+ capture_loop_queue_packet_cb : -+ capture_loop_write_packet_cb ; -+ // printf("dispatch to netmap\n"); -+ inpkts = nm_dispatch((struct nm_desc *)(pcap_opts->pcap_h), -+ 1, (nm_cb_t)cb, (u_char *)pcap_opts); -+ } else -+#endif /* HAVE_NETMAP */ - if (use_threads) { - inpkts = pcap_dispatch(pcap_opts->pcap_h, 1, capture_loop_queue_packet_cb, (u_char *)pcap_opts); - } else { diff --git a/private/netmap-drop-2.diff b/private/netmap-drop-2.diff deleted file mode 100644 index 91ea6d4c6..000000000 --- a/private/netmap-drop-2.diff +++ /dev/null @@ -1,435 +0,0 @@ -Index: /home/luigi/FreeBSD/head/sys/netinet/udp_usrreq.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/netinet/udp_usrreq.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/netinet/udp_usrreq.c (working copy) -@@ -941,6 +941,7 @@ - #define UH_WLOCKED 2 - #define UH_RLOCKED 1 - #define UH_UNLOCKED 0 -+extern int netmap_drop; // XXX - static int - udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, - struct mbuf *control, struct thread *td) -@@ -956,6 +957,7 @@ - int unlock_udbinfo; - u_char tos; - -+ if (netmap_drop == 32) { m_freem(m); if (control) m_freem(control); return 0; } // XXX drop - /* - * udp_output() may need to temporarily bind or connect the current - * inpcb. As such, we don't know up front whether we will need the -@@ -1082,10 +1084,12 @@ - error = EINVAL; - goto release; - } -+ if (netmap_drop == 33) { error = 0; goto release; } // XXX drop - error = in_pcbbind_setup(inp, (struct sockaddr *)&src, - &laddr.s_addr, &lport, td->td_ucred); - if (error) - goto release; -+ if (netmap_drop == 34) { error = 0; goto release; } // XXX drop - } - - /* -@@ -1107,9 +1111,11 @@ - * Jail may rewrite the destination address, so let it do - * that before we use it. - */ -+ if (netmap_drop == 35) { error = 0; goto release; } // XXX drop - error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); - if (error) - goto release; -+ if (netmap_drop == 36) { error = 0; goto release; } // XXX drop - - /* - * If a local address or port hasn't yet been selected, or if -@@ -1170,6 +1176,7 @@ - } - } - -+ if (netmap_drop == 37) { error = 0; goto release; } // XXX drop - /* - * Calculate data length and get a mbuf for UDP, IP, and possible - * link-layer headers. Immediate slide the data pointer back forward -@@ -1240,6 +1247,7 @@ - INP_HASH_WUNLOCK(&V_udbinfo); - else if (unlock_udbinfo == UH_RLOCKED) - INP_HASH_RUNLOCK(&V_udbinfo); -+ if (netmap_drop == 31) { error = 0; goto release; } // XXX - error = ip_output(m, inp->inp_options, NULL, ipflags, - inp->inp_moptions, inp); - if (unlock_udbinfo == UH_WLOCKED) -@@ -1585,6 +1593,7 @@ - { - struct inpcb *inp; - -+ if (netmap_drop == 30) { m_freem(m); if (control) m_freem(control); return 0; } // XXX - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("udp_send: inp == NULL")); - return (udp_output(inp, m, addr, control, td)); -Index: /home/luigi/FreeBSD/head/sys/netinet/ip_output.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/netinet/ip_output.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/netinet/ip_output.c (working copy) -@@ -98,7 +98,7 @@ - - extern int in_mcast_loop; - extern struct protosw inetsw[]; -- -+extern int netmap_drop; - /* - * IP output. The packet in mbuf chain m contains a skeletal IP - * header (with len, off, ttl, proto, tos, src, dst). -@@ -135,6 +135,7 @@ - #endif - M_ASSERTPKTHDR(m); - -+ if (netmap_drop == 50) {goto bad; } // XXX - if (inp != NULL) { - INP_LOCK_ASSERT(inp); - M_SETFIB(m, inp->inp_inc.inc_fibnum); -@@ -164,6 +165,7 @@ - flow_to_route(fle, ro); - } - #endif -+ if (netmap_drop == 51) {goto bad; } // XXX - - if (opt) { - int len = 0; -@@ -303,6 +305,7 @@ - else - isbroadcast = in_broadcast(dst->sin_addr, ifp); - } -+ if (netmap_drop == 56) {goto bad; } // XXX - /* - * Calculate MTU. If we have a route that is up, use that, - * otherwise use the interface's MTU. -@@ -474,6 +477,7 @@ - } - - sendit: -+ if (netmap_drop == 52) {goto bad; } // XXX - #ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &flags, &error)) { - case 1: -@@ -501,6 +505,7 @@ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) - goto passout; - -+ if (netmap_drop == 53) {goto bad; } // XXX - /* Run through list of hooks for output packets. */ - odst.s_addr = ip->ip_dst.s_addr; - error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); -@@ -570,6 +575,7 @@ - } - - passout: -+ if (netmap_drop == 54) {goto bad; } // XXX - /* 127/8 must not appear on wire - RFC1122. */ - if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { -@@ -628,6 +634,7 @@ - * to avoid confusing lower layers. - */ - m->m_flags &= ~(M_PROTOFLAGS); -+ if (netmap_drop == 55) {goto bad; } // XXX - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro); - goto done; -Index: /home/luigi/FreeBSD/head/sys/kern/uipc_mbuf.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/kern/uipc_mbuf.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/kern/uipc_mbuf.c (working copy) -@@ -84,6 +84,33 @@ - &m_defragrandomfailures, 0, ""); - #endif - -+int copydata_flags; -+SYSCTL_DECL(_dev_netmap); -+SYSCTL_INT(_dev_netmap, OID_AUTO, copy_flags, CTLFLAG_RW, ©data_flags, 0, ""); -+ -+static inline void -+pkt_copy(void *_src, void *_dst, int l) -+{ -+ uint64_t *src = _src; -+ uint64_t *dst = _dst; -+#define likely(x) __builtin_expect(!!(x), 1) -+#define unlikely(x) __builtin_expect(!!(x), 0) -+ if (unlikely(l >= 1024)) { -+ bcopy(src, dst, l); -+ return; -+ } -+ for (; l > 0; l-=64) { -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ *dst++ = *src++; -+ } -+} -+ - /* - * Allocate a given length worth of mbufs and/or clusters (whatever fits - * best) and return a pointer to the top of the allocated chain. If an -@@ -807,6 +834,10 @@ - - KASSERT(off >= 0, ("m_copydata, negative off %d", off)); - KASSERT(len >= 0, ("m_copydata, negative len %d", len)); -+if (copydata_flags && off == 0 && m->m_next == NULL) { -+ pkt_copy(mtod(m, caddr_t), cp, len); -+ return; -+} - while (off > 0) { - KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); - if (off < m->m_len) -Index: /home/luigi/FreeBSD/head/sys/kern/uipc_syscalls.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/kern/uipc_syscalls.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/kern/uipc_syscalls.c (working copy) -@@ -678,6 +678,7 @@ - return (error); - } - -+extern int netmap_drop; // XXX - static int - sendit(td, s, mp, flags) - struct thread *td; -@@ -694,6 +695,7 @@ - return (ECAPMODE); - #endif - -+ if (netmap_drop == 21) return 0; // XXX - if (mp->msg_name != NULL) { - error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); - if (error) { -@@ -704,6 +706,7 @@ - } else { - to = NULL; - } -+ if (netmap_drop == 22) { error = 0; goto bad; } // XXX - - if (mp->msg_control) { - if (mp->msg_controllen < sizeof(struct cmsghdr) -@@ -733,6 +736,7 @@ - control = NULL; - } - -+ if (netmap_drop == 23) {error =0; goto bad; } // XXX - error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); - - bad: -@@ -765,6 +769,7 @@ - rights = CAP_WRITE; - if (mp->msg_name != NULL) - rights |= CAP_CONNECT; -+ if (netmap_drop == 24) { return 0; } // XXX - error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL); - if (error) - return (error); -@@ -805,6 +810,7 @@ - ktruio = cloneuio(&auio); - #endif - len = auio.uio_resid; -+ if (netmap_drop == 25) { error = 0; goto bad; } // XXX - error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); - if (error) { - if (auio.uio_resid != len && (error == ERESTART || -@@ -847,6 +853,7 @@ - struct iovec aiov; - int error; - -+ if (netmap_drop == 20) return 0; - msg.msg_name = uap->to; - msg.msg_namelen = uap->tolen; - msg.msg_iov = &aiov; -Index: /home/luigi/FreeBSD/head/sys/dev/ixgbe/ixgbe.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/dev/ixgbe/ixgbe.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/dev/ixgbe/ixgbe.c (working copy) -@@ -338,6 +338,7 @@ - * that extend the standard driver. - */ - #include -+extern int netmap_flags, netmap_drop; // XXX - #endif /* DEV_NETMAP */ - - /********************************************************************* -@@ -796,11 +797,14 @@ - struct tx_ring *txr; - int i = 0, err = 0; - -+ if (netmap_drop == 90) {m_freem(m); return 0; } // XXX - /* Which queue to use */ -+ if (netmap_flags & 4 && !(m->m_flags & M_FLOWID)) printf("%s %d no flowid curcpu %d\n", __func__, __LINE__, curcpu); - if ((m->m_flags & M_FLOWID) != 0) - i = m->m_pkthdr.flowid % adapter->num_queues; - else - i = curcpu % adapter->num_queues; -+ if (netmap_flags & 32) i = 0; // XXX - - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; -@@ -809,6 +813,7 @@ - err = ixgbe_mq_start_locked(ifp, txr, m); - IXGBE_TX_UNLOCK(txr); - } else { -+ if (netmap_drop == 92) {m_freem(m); return 0; } // XXX - err = drbr_enqueue(ifp, txr->br, m); - taskqueue_enqueue(que->tq, &txr->txq_task); - } -@@ -842,6 +847,7 @@ - - /* Process the queue */ - while (next != NULL) { -+ if (netmap_drop == 91) {m_freem(next); err = 0; goto cont; } // XXX - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next != NULL) - err = drbr_enqueue(ifp, txr->br, next); -@@ -854,6 +860,7 @@ - break; - if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) - ixgbe_txeof(txr); -+cont: // XXX - next = drbr_dequeue(ifp, txr->br); - } - -@@ -1764,6 +1771,7 @@ - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - -+ if (netmap_drop == 93) {m_freem(m_head); return 0; } // XXX - /* - * Map the packet for DMA. - */ -@@ -1800,6 +1808,7 @@ - return (error); - } - } -+ if (netmap_drop == 94) {m_freem(*m_headp); return 0; } // XXX - - /* Make certain there are enough descriptors */ - if (nsegs > txr->tx_avail - 2) { -Index: /home/luigi/FreeBSD/head/sys/net/pfil.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/net/pfil.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/net/pfil.c (working copy) -@@ -64,6 +64,8 @@ - VNET_DEFINE(struct rmlock, pfil_lock); - #define V_pfil_lock VNET(pfil_lock) - -+extern int netmap_drop; -+ - /* - * pfil_run_hooks() runs the specified packet filter hooks. - */ -@@ -75,8 +77,18 @@ - struct packet_filter_hook *pfh; - struct mbuf *m = *mp; - int rv = 0; -- -+if (netmap_drop == 70) return 0; -+ - PFIL_RLOCK(ph, &rmpt); -+if (netmap_drop == 71) { -+ int num=0, act=0; -+ for (pfh = pfil_hook_get(dir, ph); pfh != NULL; -+ pfh = TAILQ_NEXT(pfh, pfil_link)) { -+ num++; -+ if (pfh->pfil_func != NULL) act++; -+ } -+ printf("dir %d total %d active %d\n", dir, num, act); -+} - KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); - for (pfh = pfil_hook_get(dir, ph); pfh != NULL; - pfh = TAILQ_NEXT(pfh, pfil_link)) { -Index: /home/luigi/FreeBSD/head/sys/net/if_ethersubr.c -=================================================================== ---- /home/luigi/FreeBSD/head/sys/net/if_ethersubr.c (revision 244673) -+++ /home/luigi/FreeBSD/head/sys/net/if_ethersubr.c (working copy) -@@ -141,6 +141,14 @@ - - #define senderr(e) do { error = (e); goto bad;} while (0) - -+#if defined(INET) || defined(INET6) -+int -+ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, int shared); -+static VNET_DEFINE(int, ether_ipfw); -+#define V_ether_ipfw VNET(ether_ipfw) -+#endif -+ -+extern int netmap_flags, netmap_drop; // XXX - /* - * Ethernet output routine. - * Encapsulate a packet of type family for the local net. -@@ -161,6 +169,7 @@ - int loop_copy = 1; - int hlen; /* link layer header length */ - -+ if (netmap_drop == 80) { error = 0; goto bad; } // XXX - if (ro != NULL) { - if (!(m->m_flags & (M_BCAST | M_MCAST))) - lle = ro->ro_lle; -@@ -183,6 +192,7 @@ - switch (dst->sa_family) { - #ifdef INET - case AF_INET: -+if (netmap_flags & 8 && lle == NULL) printf("%s %d ro %p rt0 %p no lle\n", __FUNCTION__, __LINE__, ro, rt0); - if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); - else -@@ -309,6 +319,7 @@ - return (if_simloop(ifp, m, dst->sa_family, 0)); - } - -+ if (netmap_drop == 81) { error = 0; goto bad; } // XXX - /* - * Add local net header. If no space in first mbuf, - * allocate another. -@@ -317,9 +328,12 @@ - if (m == NULL) - senderr(ENOBUFS); - eh = mtod(m, struct ether_header *); -+ if (netmap_drop == 87) { error = 0; goto bad; } // XXX - (void)memcpy(&eh->ether_type, &type, - sizeof(eh->ether_type)); -+ if (netmap_drop == 88) { error = 0; goto bad; } // XXX - (void)memcpy(eh->ether_dhost, edst, sizeof (edst)); -+ if (netmap_drop == 89) { error = 0; goto bad; } // XXX - if (hdrcmplt) - (void)memcpy(eh->ether_shost, esrc, - sizeof(eh->ether_shost)); -@@ -327,6 +341,7 @@ - (void)memcpy(eh->ether_shost, IF_LLADDR(ifp), - sizeof(eh->ether_shost)); - -+ if (netmap_drop == 82) { error = 0; goto bad; } // XXX - /* - * If a simplex interface, and the packet is being sent to our - * Ethernet address or a broadcast address, loopback a copy. -@@ -379,6 +394,7 @@ - } - } - -+ if (netmap_drop == 83) { error = 0; goto bad; } // XXX - /* - * Bridges require special output handling. - */ -@@ -406,6 +422,7 @@ - return (0); - } - -+ if (netmap_drop == 84) { error = 0; goto bad; } // XXX - /* Continue with link-layer output */ - return ether_output_frame(ifp, m); - } -@@ -431,6 +448,7 @@ - return (0); - } - -+ if (netmap_drop == 85) { m_freem(m); return 0; } // XXX - /* - * Queue message on interface, update output statistics if - * successful, and start output if interface not yet active. diff --git a/private/qemu/PICOBSD b/private/qemu/PICOBSD deleted file mode 100644 index 713493120..000000000 --- a/private/qemu/PICOBSD +++ /dev/null @@ -1,154 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD 201065 2009-12-27 22:34:31Z luigi $ -# A configuration file to run tests on qemu. -# We disable SMP because it does not work well with qemu, and set HZ=1000 -# to avoid it being overridden. -# -# Line starting with #PicoBSD contains PicoBSD build parameters -#marker def_sz init MFS_inodes floppy_inodes -#PicoBSD 26000 init 8192 32768 -options MD_ROOT_SIZE=26000 # same as def_sz - -hints "PICOBSD.hints" - -# values accessible through getenv() -# env "PICOBSD.env" - -#cpu I486_CPU -#cpu I586_CPU -cpu I686_CPU -ident PICOBSD - -options SMP -device acpi # more frequencies ? -device apic # kern_et ? -device cpufreq - -option INVARIANTS -option INVARIANT_SUPPORT -options SCHED_ULE # mandatory to have one scheduler -options PREEMPTION -#options MATH_EMULATE #Support for x87 emulation -options INET #InterNETworking -options INET6 -options FFS #Berkeley Fast Filesystem -#options BOOTP #Use BOOTP to obtain IP address/hostname -options MD_ROOT #MD is a potential root device - -#options NFS #Network Filesystem -#options NFS_ROOT #NFS usable as root device, NFS required - -#options MSDOSFS #MSDOS Filesystem -#options CD9660 #ISO 9660 Filesystem -#options CD9660_ROOT #CD-ROM usable as root, CD9660 required -#options DEVFS #Device Filesystem -#options PROCFS #Process filesystem -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] - -options KDB -options DDB - -options IPFIREWALL -options IPFIREWALL_DEFAULT_TO_ACCEPT -options IPDIVERT # divert (for natd) - -# Support for bridging and bandwidth limiting -options DUMMYNET -options IPFIREWALL_NAT -options LIBALIAS -device if_bridge -# Running with less than 1000 seems to give poor timing on -# qemu, so we set HZ explicitly. -options HZ=1000 - -device random # used by ssh -device pci - -# Floppy drives -device fdc - -# ATA and ATAPI devices -#device ata -#device atadisk # ATA disk drives -#device atapicd # ATAPI CDROM drives -#options ATA_STATIC_ID #Static device numbering - -# atkbdc0 controls both the keyboard and the PS/2 mouse -device atkbdc # At keyboard controller -device atkbd -#device psm # do we need the mouse ?? - -device vga # VGA screen - -# syscons is the default console driver, resembling an SCO console -device sc - -# Serial (COM) ports -device uart - -# Audio support -#device pcm - -# PCCARD (PCMCIA) support -#device card # pccard bus -#device pcic # PCMCIA bridge - -# Parallel port -#device ppc -#device ppbus # Parallel port bus (required) -#device lpt # Printer -#device plip # TCP/IP over parallel -#device ppi # Parallel port interface device - -# -# The following Ethernet NICs are all PCI devices. -# -device miibus -device ixgbe -device oce -device cxgbe # chelsio -device firmware # needed for chelsio ? -device em -device bge -#device fxp # Intel EtherExpress PRO/100B (82557, 82558) -device nfe # nVidia nForce MCP on-board Ethernet -#device xl # 3Com -device rl # RealTek 8129/8139 -device re # RealTek 8139C+/8169/8169S/8110S -device sis # National/SiS -device dc # DEC/Intel 21143 and various workalikes -device ed - -device loop # Network loopback -device ether # Ethernet support -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -#device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) -device tap - -#-- usb support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device ehci # EHCI PCI->USB interface (USB 2.0) -device usb -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device scbus -device da -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse - - -device kbdmux -options KBD_INSTALL_CDEV - -#options VIMAGE - -#options DEVICE_POLLING - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter -device netmap diff --git a/private/qemu/PICOBSD.amd64 b/private/qemu/PICOBSD.amd64 deleted file mode 100644 index 02d7678e6..000000000 --- a/private/qemu/PICOBSD.amd64 +++ /dev/null @@ -1,193 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD 201065 2009-12-27 22:34:31Z luigi $ -# A configuration file to run tests on qemu. -# This file is specific for AMD64 -# -# Line starting with #PicoBSD contains PicoBSD build parameters -#marker def_sz init MFS_inodes floppy_inodes -#PicoBSD 48000 init 8192 32768 -options MD_ROOT_SIZE=48000 # same as def_sz - -hints "PICOBSD.hints" - -# values accessible through getenv() -#env "PICOBSD.env" -#maxusers 10000 # large tables - -cpu HAMMER # I686_CPU -ident PICOBSD - -options NO_SWAPPING - -# compile with unnamed unions -makeoptions COPTFLAGS="-fms-extensions" - -#makeoptions "CCOPTS=-O3 -Wall -Werror" -#makeoptions COPTFLAGS="-O2 -Wall" # -Werror -#makeoptions CWARNFLAGS="-Wunused -Wuninitialized -Wno-pointer-sign -Wredundant-decls -fformat-extensions" -# clang may require more e.g. -Wno-tautological-compare - -#makeoptions WARNS=5 -options SMP -device acpi # more frequencies ? -# device apic # not for amd64 -device cpufreq - -#option INVARIANTS -#option INVARIANT_SUPPORT -#options WITNESS - -# XXX i find that SCHED_4BSD+PREEMPTION seems to work better. -#options SCHED_ULE # mandatory to have one scheduler -options SCHED_4BSD # mandatory to have one scheduler -options PREEMPTION -#options MATH_EMULATE #Support for x87 emulation -options INET #InterNETworking -#options INET6 -options FFS #Berkeley Fast Filesystem -#options BOOTP #Use BOOTP to obtain IP address/hostname -options MD_ROOT #MD is a potential root device - -#options NFS #Network Filesystem -#options NFS_ROOT #NFS usable as root device, NFS required - -#options MSDOSFS #MSDOS Filesystem -#options CD9660 #ISO 9660 Filesystem -#options CD9660_ROOT #CD-ROM usable as root, CD9660 required -#options DEVFS #Device Filesystem -#options PROCFS #Process filesystem -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] - -options HWPMC_HOOKS -device hwpmc - -options KDB -options DDB - -#--- for fast networking, remove ipfw -options IPFIREWALL -options IPFIREWALL_DEFAULT_TO_ACCEPT -#options IPDIVERT # divert (for natd) - -# Support for bridging and bandwidth limiting -options DUMMYNET -#options IPFIREWALL_NAT -#options LIBALIAS -#device if_bridge - -# Running with less than 1000 seems to give poor timing on -# qemu, so we set HZ explicitly. -options HZ=4000 # XXX use 4000 for good polling - -device random # used by ssh -device pci - -# Floppy drives -device fdc - -# ATA and ATAPI devices -#device ata -#device atadisk # ATA disk drives -#device atapicd # ATAPI CDROM drives -#options ATA_STATIC_ID #Static device numbering - -# atkbdc0 controls both the keyboard and the PS/2 mouse -device atkbdc # At keyboard controller -device atkbd -#device psm # do we need the mouse ?? - -device vga # VGA screen - -# syscons is the default console driver, resembling an SCO console -device sc - -# Serial (COM) ports -device uart - -# Audio support -#device pcm - -# PCCARD (PCMCIA) support -#device card # pccard bus -#device pcic # PCMCIA bridge - -# -# The following Ethernet NICs are all PCI devices. -# -device miibus -#device ofed -#device mlx4ib -# device ipoib -#device sdp -#device mlxen -#device mthca - -device ixgbe -#device bxe # broadcom 10G -#device oce # emulex -#device sfxge # solarflare -#device cxgbe # chelsio -device firmware # needed for chelsio ? -device em -device igb -device bge -#device fxp # Intel EtherExpress PRO/100B (82557, 82558) -device nfe # nVidia nForce MCP on-board Ethernet -#device xl # 3Com -device rl # RealTek 8129/8139 -device re # RealTek 8139C+/8169/8169S/8110S -device sis # National/SiS -device dc # DEC/Intel 21143 and various workalikes -device ed - -device loop # Network loopback -device ether # Ethernet support -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -#device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) -device tap - -#-- usb support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device ehci # EHCI PCI->USB interface (USB 2.0) -device usb -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device scbus -device da -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse -#device hwpmc # not good as compiled in. - -device virtio #XXX for virtio -device virtio_pci -device vtnet #XXX for virtio - -device kbdmux -options KBD_INSTALL_CDEV - -#options VIMAGE - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter -device netmap -options DEVICE_POLLING -options FLOWTABLE - -# options NETLINK - -#--- diskless support - -options NFSCL -#options BOOTP # compile relevant files (also need NFSCL or NFSCLIENT) -#options NFS_ROOT # compile nfs_diskless.c -#options BOOTP_NFSROOT # request root name from server ? -#options BOOTP_COMPAT # accept replies from 0.0.0.0 (ip_input.c) -#options BOOTP_WIRED_TO=fxp0 - -# BOOTP_NO_DHCP only bootp -# BOOTP_FORCE_DHCP only dhcp diff --git a/private/qemu/PICOBSD.arm b/private/qemu/PICOBSD.arm deleted file mode 100644 index 227b85d55..000000000 --- a/private/qemu/PICOBSD.arm +++ /dev/null @@ -1,141 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD 201065 2009-12-27 22:34:31Z luigi $ -# A configuration file to run tests on qemu. -# We disable SMP because it does not work well with qemu, and set HZ=1000 -# to avoid it being overridden. -# -# Line starting with #PicoBSD contains PicoBSD build parameters -#marker def_sz init MFS_inodes floppy_inodes -#PicoBSD 26000 init 8192 32768 -options MD_ROOT_SIZE=26000 # same as def_sz - -hints "PICOBSD.hints" - -# values accessible through getenv() -# env "PICOBSD.env" - -#cpu I486_CPU -#cpu I586_CPU -cpu CPU_ARM9E -makeoptions CONF_CFLAGS="-march=armv5te" - -ident PICOBSD - -options SMP -#device acpi # more frequencies ? -device cpufreq - -option INVARIANTS -option INVARIANT_SUPPORT -options SCHED_ULE # mandatory to have one scheduler -options PREEMPTION -#options MATH_EMULATE #Support for x87 emulation -options INET #InterNETworking -options INET6 -options FFS #Berkeley Fast Filesystem -#options BOOTP #Use BOOTP to obtain IP address/hostname -options MD_ROOT #MD is a potential root device - -#options NFS #Network Filesystem -#options NFS_ROOT #NFS usable as root device, NFS required - -#options MSDOSFS #MSDOS Filesystem -#options CD9660 #ISO 9660 Filesystem -#options CD9660_ROOT #CD-ROM usable as root, CD9660 required -#options DEVFS #Device Filesystem -#options PROCFS #Process filesystem -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] - -options KDB -options DDB - -options IPFIREWALL -options IPFIREWALL_DEFAULT_TO_ACCEPT -options IPDIVERT # divert (for natd) - -# Support for bridging and bandwidth limiting -options DUMMYNET -options IPFIREWALL_NAT -options LIBALIAS -device if_bridge -# Running with less than 1000 seems to give poor timing on -# qemu, so we set HZ explicitly. -options HZ=1000 - -device random # used by ssh -device pci - -# ATA and ATAPI devices -#device ata -#device atadisk # ATA disk drives -#device atapicd # ATAPI CDROM drives -#options ATA_STATIC_ID #Static device numbering - -# Serial (COM) ports -device uart - -# Audio support -#device pcm - -# PCCARD (PCMCIA) support -#device card # pccard bus -#device pcic # PCMCIA bridge - -# Parallel port -#device ppc -#device ppbus # Parallel port bus (required) -#device lpt # Printer -#device plip # TCP/IP over parallel -#device ppi # Parallel port interface device - -# -# The following Ethernet NICs are all PCI devices. -# -device miibus -device ixgbe -device cxgbe # chelsio -device sfxge # solarflare -device firmware # needed for chelsio ? -device em -device bge -#device fxp # Intel EtherExpress PRO/100B (82557, 82558) -#device xl # 3Com -device rl # RealTek 8129/8139 -device re # RealTek 8139C+/8169/8169S/8110S -device sis # National/SiS -device dc # DEC/Intel 21143 and various workalikes -device ed - -device loop # Network loopback -device ether # Ethernet support -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -#device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) -device tap - -#-- usb support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device ehci # EHCI PCI->USB interface (USB 2.0) -device usb -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device scbus -device da -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse - - -device kbdmux -options KBD_INSTALL_CDEV - -#options VIMAGE - -#options DEVICE_POLLING - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter -#device netmap diff --git a/private/qemu/PICOBSD.hints b/private/qemu/PICOBSD.hints deleted file mode 100644 index cdb038ba4..000000000 --- a/private/qemu/PICOBSD.hints +++ /dev/null @@ -1,39 +0,0 @@ -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD.hints 201065 2009-12-27 22:34:31Z luigi $ -hint.fdc.0.at="isa" -hint.fdc.0.port="0x3F0" -hint.fdc.0.irq="6" -hint.fdc.0.drq="2" -hint.fd.0.at="fdc0" -hint.fd.0.drive="0" -hint.ata.0.at="isa" -hint.ata.0.port="0x1F0" -hint.ata.0.irq="14" -hint.ata.1.at="isa" -hint.ata.1.port="0x170" -hint.ata.1.irq="15" -hint.atkbdc.0.at="isa" -hint.atkbdc.0.port="0x060" -hint.atkbd.0.at="atkbdc" -hint.atkbd.0.irq="1" -hint.psm.0.at="atkbdc" -hint.psm.0.irq="12" -hint.vga.0.at="isa" -hint.sc.0.at="isa" -hint.npx.0.at="nexus" -hint.npx.0.port="0x0F0" -hint.npx.0.irq="13" -hint.uart.0.at="isa" -hint.uart.0.port="0x3F8" -hint.uart.0.flags="0x10" -hint.uart.0.irq="4" -hint.uart.1.at="isa" -hint.uart.1.port="0x2F8" -hint.uart.1.irq="3" -hint.ed.0.at="isa" -hint.ed.0.port="0x280" -hint.ed.0.irq="5" -hint.ed.0.maddr="0xd8000" -hint.ed.1.at="isa" -hint.ed.1.port="0x300" -hint.ed.1.irq="5" -hint.ed.1.maddr="0xd0000" diff --git a/private/qemu/config b/private/qemu/config deleted file mode 100644 index 9dd9f5965..000000000 --- a/private/qemu/config +++ /dev/null @@ -1,71 +0,0 @@ -# configuration for picobsd build script. -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/config 201065 2009-12-27 22:34:31Z luigi $ -# it should only contain variable definitions -- it is sourced -# by the shell much like rc.conf* files - -fd_size="24000" - -# You can use it e.g. in a local configuration file by writing -# -# do_copyfiles_user() { -# local dst=$1 -# find_progs nvi sed less grep -# cp -p ${u_progs} ${dst}/bin -# cp -p ${u_libs} ${dst}/lib -# mkdir -p ${dst}/libexec -# find_progs ld-elf.so.1 -# cp -p ${u_progs} ${dst}/libexec -# } -#copy_files=" -#" -do_copyfiles_user() { - local dst=$1 - log "--- called do_copyfiles_user" - - mkdir -p ${dst}/usr/lib - - find_progs -L / /usr/bin/ssh /usr/bin/scp /usr/sbin/sshd - cp -p ${u_progs} ${dst}/bin - # logverbose "Libraries for ssh etc: ${u_libs}" - cp -p ${u_libs} ${dst}/usr/local/lib - - find_progs -L / -P /usr/local/bin trafshow netperf netserver screen - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/local/lib - - - # XXX change this to head/tools/tools/netmap - #local d=/usr/ports-luigi/netmap-release/examples - local d=/usr/home/luigi/qemu-misc/netmap-release/examples - find_progs -L / -P $d pkt-gen bridge poll vale-ctl testlock click # pingd - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - cp -p $d/testmod/test.ko ${dst} # XXX - cp -rp /tmp/boot/modules ${dst} # XXX - - find_progs -L / /tmp/click /tmp/kipfw /tmp/uipfw /tmp/ovs-vswitchd /tmp/ovs-dpctl /tmp/ovs-ofctl /tmp/ovs-dpctl - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - -# find_progs -L /usr/local/lib -P /usr/local/bin tcpreplay -# cp -p ${u_progs} ${dst}/bin -# cp -p ${u_libs} ${dst}/usr/lib - -# cp -p /tmp/tcpreplay ${dst}/root - cp -p /tmp/nltest ${dst}/bin - cp -p /tmp/ovsbsd.ko ${dst}/root - cp -p /tmp/netsend /tmp/netreceive ${dst}/bin - cp -p /tmp/if_vtnet.ko ${dst}/bin -# cp -p /tmp/a.pcap ${dst}/root -# cp -p /tmp/open_key/* ${dst}/root - - cp -p /tmp/openvswitch.ko {dst}/root - - if [ ${TARGET_ARCH} = amd64 ]; then - cp -p /usr/lib/libssh.so.5 /lib/libmd.so.5 ${dst}/usr/lib # need the old one - #find_progs -L $d /tmp/bridge - #cp -p ${u_progs} ${dst}/bin - #cp -p ${u_libs} ${dst}/usr/lib - else - fi -} diff --git a/private/qemu/crunch.conf b/private/qemu/crunch.conf deleted file mode 100644 index 12a6c8e44..000000000 --- a/private/qemu/crunch.conf +++ /dev/null @@ -1,245 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/crunch.conf 201065 2009-12-27 22:34:31Z luigi $ -# -# Configuration file for "qemu" images.. -# -# Depending on your needs, you will almost surely need to -# add/remove/change programs according to your needs. -# Remember that some programs require matching kernel options to -# enable device drivers etc. -# -# To figure out how much space is used by each program, do -# -# size build_dir-bridge/crunch/*lo -# -# Remember that programs require libraries, which add up to the -# total size. The final binary is build_dir-bridge/mfs.tree/stand/crunch -# and you can check which libraries it uses with -# -# ldd build_dir-bridge/mfs.tree/stand/crunch - -# crunchgen configuration to build the crunched binary, see "man crunchgen" -# We need to specify generic build options, the places where to look -# for sources, and the list of program and libraries we want to put -# in the crunched binary. -# -# NOTE: the string "/usr/src" below will be automatically replaced with -# the path set in the 'build' script. - -# Default build options. Basically tell the Makefiles -# that to use the most compact possible version of the code. - -buildopts -DWITHOUT_PAM -DRELEASE_CRUNCH -DPPP_NO_NETGRAPH -buildopts -DTRACEROUTE_NO_IPSEC # -DNO_INET6 -buildopts -DWITHOUT_IPX -buildopts -DWITHOUT_CASPER -# buildopts -DWITHOUT_AUDIT MK_AUDIT=no # login uses lbsm - -# Directories where to look for sources of various binaries. -# @__CWD__@ is a magic keyword in the picobsd's (Makefile.conf) -# which is replaced with the directory with the picobsd configuration -# corresponding to your image. This way you can have custom sources -# in that directory overriding system programs. - -srcdirs @__CWD__@/src - -# Some programs are especially written for PicoBSD and reside in -# release/picobsd/tinyware. -# Put this entry near the head of the list to override standard binaries. - -srcdirs /usr/src/release/picobsd/tinyware - -# Other standard locations for sources. -# If a program uses its own source directory, add - -srcdirs /usr/src/bin -srcdirs /usr/src/sbin/i386 -srcdirs /usr/src/sbin -srcdirs /usr/src/usr.bin -srcdirs /usr/src/gnu/usr.bin -srcdirs /usr/src/usr.sbin -srcdirs /usr/src/libexec -srcdirs /usr/src/secure/usr.bin -srcdirs /usr/src/secure/usr.sbin - -# For programs that reside in different places, the best option -# is to use the command "special XXX srcdir YYY" where XXX is the -# program name and YYY is the directory path. -# "special XXX ..." can be used to specify more options, see again -# the crunchgen manpage. - -#--- Basic configuraton -# init is always necessary (unless you have a replacement, oinit) -progs init -progs kenv - -# fsck is almost always necessary, unless you have everything on the -# image and use 'tar' or something similar to read/write raw blocks -# from the floppy. - -progs fsck - -progs nc # netcat -progs dd # -# ifconfig is needed if you want to configure interfaces. -progs ifconfig - -# You will also need a shell and a bunch of utilities. -# The standard shell is not that large, but you need many -# external programs. In fact most of them do not take much space -# as they merely issue a system call, and print the result. -# For a more compact version of shell and utilities, you could -# try busybox, however most system management commands in busybox -# will not work as they use linux-specific interfaces. - -progs sh -ln sh -sh - -# the small utilities -progs echo -progs pwd mkdir rmdir -progs chmod chown -ln chown chgrp -progs mv ln cp rm ls -progs cat tail tee -progs test -ln test [ -progs shutdown halt - -progs less -ln less more -progs mount -progs minigzip -ln minigzip gzip -progs kill -progs df -progs ps -progs ns # this is the picobsd version -ln ns netstat -progs vm -progs hostname -progs login -progs getty -progs stty -progs w -progs msg -progs nice -ln msg dmesg -progs reboot - -progs sysctl -progs swapon -progs pwd_mkdb -progs umount -progs du -progs passwd - -progs route - -# If you want to run natd, remember the alias library -progs natd -libs_so -lalias # natd -progs tcpdump -special tcpdump srcdir /usr/src/usr.sbin/tcpdump/tcpdump -libs_so -lpcap # used by tcpdump -libs_so -lcrypto # used by tcpdump with inet6 - -# ppp is rather large. Note that as of Jan.01, RELEASE_CRUNCH -# makes ppp not use libalias, so you cannot have aliasing. -#progs ppp - -# You need an editor. ee is relatively small, though there are -# smaller ones. vi is much larger. -# The editor also usually need a curses library. -progs ee - -progs arp - -progs vmstat iostat sleep -# these require libgeom -# progs bsdlabel fdisk mdconfig - -progs kldload kldunload kldstat -progs kldxref -#progs grep -#buildopts -DMK_BSD_GREP=1 -progs hexdump - -libs_so -lgnuregex -lbz2 -# dhclient-script requires 'sed' -progs dhclient -progs sed -progs date -progs time -progs ping -progs ping6 -progs tar - -progs top -progs pciconf - -#progs routed -progs ipfw -progs traceroute -progs mdmfs -ln mdmfs mount_mfs -# Various filesystem support -- remember to enable the kernel parts -# progs mount_msdosfs -progs mount_nfs -# progs mount_cd9660 -ln mount_nfs nfs -ln mount_cd9660 cd9660 -#progs newfs -#ln newfs mount_mfs -# ln mount_msdosfs msdos -progs uname - -progs vi - -progs jail jexec jls - -progs pmcstat - -# progs ld-elf.so.1 - -#progs pmcstat -#libs_so -lpmc -# For a small ssh client/server use dropbear - -# Now the libraries -libs_so -lc # the C library -libs_so -ll # used by sh (really ?) -libs_so -lufs # used by mount -### ncurses is needed on HEAD as of 2013-09 -libs_so -lncurses -libs_so -lncursesw # for wide char support as of 266157 -libs_so -lpam # -lbsm # full login -lradius -ltacplus -lopie -libs_so -lm -libs_so -ledit -lutil -libs_so -lcrypt -libs_so -lkvm -libs_so -lpmc # pmcstat -libs_so -lelf # pmcstat -libs_so -lz -libs_so -lbsdxml -libs_so -lsbuf -libs_so -ljail # used by ifconfig -libs_so -lulog -libs_so -lipsec -lmd -libs_so -larchive -lbz2 -libs_so -llzma # added after 207840 -libs_so -ldevstat -lmemstat -# libs_so -lxo #-- only on HEAD - -# -#--- ssh support -# progs ssh -# progs sshd -# progs scp -# -# libs_so -lssh -# libs_so -lwrap -# libs_so -lpam -# libs_so -lgssapi -# libs_so -lkrb5 # ssh ? -# libs_so -lcapsicum -lnv diff --git a/private/qemu/crunch.conf.amd64 b/private/qemu/crunch.conf.amd64 deleted file mode 100644 index d847e473f..000000000 --- a/private/qemu/crunch.conf.amd64 +++ /dev/null @@ -1,209 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/crunch.conf 201065 2009-12-27 22:34:31Z luigi $ -# -# Configuration file for "qemu" images.. -# -# Depending on your needs, you will almost surely need to -# add/remove/change programs according to your needs. -# Remember that some programs require matching kernel options to -# enable device drivers etc. -# -# To figure out how much space is used by each program, do -# -# size build_dir-bridge/crunch/*lo -# -# Remember that programs require libraries, which add up to the -# total size. The final binary is build_dir-bridge/mfs.tree/stand/crunch -# and you can check which libraries it uses with -# -# ldd build_dir-bridge/mfs.tree/stand/crunch - -# crunchgen configuration to build the crunched binary, see "man crunchgen" -# We need to specify generic build options, the places where to look -# for sources, and the list of program and libraries we want to put -# in the crunched binary. -# -# NOTE: the string "/usr/src" below will be automatically replaced with -# the path set in the 'build' script. - -# Default build options. Basically tell the Makefiles -# that to use the most compact possible version of the code. - -buildopts -DWITHOUT_PAM -DRELEASE_CRUNCH -DPPP_NO_NETGRAPH -buildopts -DTRACEROUTE_NO_IPSEC # -DNO_INET6 -buildopts -DWITHOUT_IPX - -# Directories where to look for sources of various binaries. -# @__CWD__@ is a magic keyword in the picobsd's (Makefile.conf) -# which is replaced with the directory with the picobsd configuration -# corresponding to your image. This way you can have custom sources -# in that directory overriding system programs. - -srcdirs @__CWD__@/src - -# Some programs are especially written for PicoBSD and reside in -# release/picobsd/tinyware. -# Put this entry near the head of the list to override standard binaries. - -srcdirs /usr/src/release/picobsd/tinyware - -# Other standard locations for sources. -# If a program uses its own source directory, add - -srcdirs /usr/src/bin -srcdirs /usr/src/sbin/amd64 -srcdirs /usr/src/sbin -srcdirs /usr/src/usr.bin -srcdirs /usr/src/gnu/usr.bin -srcdirs /usr/src/usr.sbin -srcdirs /usr/src/libexec - -# For programs that reside in different places, the best option -# is to use the command "special XXX srcdir YYY" where XXX is the -# program name and YYY is the directory path. -# "special XXX ..." can be used to specify more options, see again -# the crunchgen manpage. - -#--- Basic configuraton -# init is always necessary (unless you have a replacement, oinit) -progs init - -# fsck is almost always necessary, unless you have everything on the -# image and use 'tar' or something similar to read/write raw blocks -# from the floppy. - -progs fsck - -# ifconfig is needed if you want to configure interfaces. -progs ifconfig - -# You will also need a shell and a bunch of utilities. -# The standard shell is not that large, but you need many -# external programs. In fact most of them do not take much space -# as they merely issue a system call, and print the result. -# For a more compact version of shell and utilities, you could -# try busybox, however most system management commands in busybox -# will not work as they use linux-specific interfaces. - -progs sh -ln sh -sh - -# the small utilities -progs echo -progs pwd mkdir rmdir -progs chmod chown -ln chown chgrp -progs mv ln cp rm ls -progs cat tail tee -progs test -ln test [ - -progs less -ln less more -progs mount -progs minigzip -ln minigzip gzip -progs kill -progs df -progs ps -progs ns # this is the picobsd version -ln ns netstat -progs vm -progs hostname -progs login -progs getty -progs stty -progs w -progs msg -ln msg dmesg -progs reboot - -progs sysctl -progs swapon -progs pwd_mkdb -progs umount -progs du -progs passwd - -progs route - -# If you want to run natd, remember the alias library -# progs natd -# libs_so -lalias # natd -progs tcpdump -special tcpdump srcdir /usr/src/usr.sbin/tcpdump/tcpdump -libs_so -lpcap # used by tcpdump -libs_so -lcrypto # used by tcpdump with inet6 - -# ppp is rather large. Note that as of Jan.01, RELEASE_CRUNCH -# makes ppp not use libalias, so you cannot have aliasing. -#progs ppp - -# You need an editor. ee is relatively small, though there are -# smaller ones. vi is much larger. -# The editor also usually need a curses library. -progs ee - -progs arp - -# these require libgeom -# progs bsdlabel fdisk mdconfig - -progs kldload kldunload kldstat -# progs kldxref -progs grep -# libs_so -lgnuregex -lbz2 -# dhclient-script requires 'sed' -progs dhclient -progs sed -progs date -progs time -progs ping -progs ping6 -progs tar - -progs top -progs pciconf - -#progs routed -progs ipfw -progs traceroute -progs mdmfs -ln mdmfs mount_mfs -# Various filesystem support -- remember to enable the kernel parts -# progs mount_msdosfs -progs mount_nfs -# progs mount_cd9660 -ln mount_nfs nfs -ln mount_cd9660 cd9660 -#progs newfs -#ln newfs mount_mfs -# ln mount_msdosfs msdos - -#progs jail jexec jls - -srcdirs /home/luigi/FreeBSD/pico9/qemu64 -progs bridge # - - -# For a small ssh client/server use dropbear -# progs ssh scp sshd srcdir /usr/src/crypto/openssh - -# Now the libraries -libs_so -lc # the C library -# libs_so -ll # used by sh (really ?) -# libs_so -lufs # used by mount -### ee uses ncurses but as a dependency -#libs_so -lncurses -libs_so -lm -libs_so -ledit -lutil -libs_so -lcrypt -libs_so -lkvm -libs_so -lz -libs_so -lbsdxml -libs_so -lsbuf -libs_so -ljail # used by ifconfig -libs_so -lulog -libs_so -lipsec -lmd -libs_so -larchive -lbz2 -libs_so -llzma # added after 207840 diff --git a/private/qemu/floppy.tree.exclude b/private/qemu/floppy.tree.exclude deleted file mode 100644 index adfc6cc75..000000000 --- a/private/qemu/floppy.tree.exclude +++ /dev/null @@ -1,2 +0,0 @@ -etc/snmpd.conf -etc/ppp diff --git a/private/qemu/floppy.tree/boot/loader.conf b/private/qemu/floppy.tree/boot/loader.conf deleted file mode 100644 index e2ba5ae67..000000000 --- a/private/qemu/floppy.tree/boot/loader.conf +++ /dev/null @@ -1,2 +0,0 @@ -kern.ipc.nmbclusters=128000 -luigi.test=1 diff --git a/private/qemu/floppy.tree/etc/motd b/private/qemu/floppy.tree/etc/motd deleted file mode 100644 index eb55bf34c..000000000 --- a/private/qemu/floppy.tree/etc/motd +++ /dev/null @@ -1,12 +0,0 @@ - - -============================================================== - - )\_)\ Welcome to PicoBSD, netmap demo image - (o,o) - __ \~/ Root password is "setup" - -->====\ - ~~ d d see http://info.iet.unipi.it/~luigi/netmap/ - -============================================================== -K diff --git a/private/qemu/floppy.tree/etc/rc.conf.defaults b/private/qemu/floppy.tree/etc/rc.conf.defaults deleted file mode 100644 index 70e3767ad..000000000 --- a/private/qemu/floppy.tree/etc/rc.conf.defaults +++ /dev/null @@ -1,188 +0,0 @@ -#!/bin/sh -# $FreeBSD: head/release/picobsd/floppy.tree/etc/rc.conf.defaults 91949 2002-03-09 18:27:02Z luigi $ -# -# rc.conf for picobsd. This is sourced from /etc/rc1, and is supposed to -# contain only shell functions that are used later in /etc/rc1. - -# set default values for variables. Boolean values should be either -# NO or YES -- other values are not guaranteed to work. - -rc_conf_set_defaults() { -hostname="" # Should not need to set it -syslogd_enable="NO" -pccard_enable="NO" -swapfile="" # name of swapfile if aux swapfile desired. - -# Network interface configurations: ifconfig_${interface}[_aliasNN] -ifconfig_lo0="inet 127.0.0.1" # default loopback device configuration. -#ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry. - -### Network daemons options: they are only run if present. -sshd_enable="YES" # if present... -inetd_enable="YES" # Run the network daemon dispatcher (or NO) -inetd_flags="" # Optional flags to inetd -snmpd_enable="NO" # Run the SNMP daemon (or NO) -snmpd_flags="-C -c /etc/snmpd.conf" # Optional flags to snmpd - -### Network routing options: ### -defaultrouter="NO" # Set to default gateway (or NO). -static_routes="" # Set to static route list (or leave empty). -gateway_enable="NO" # Set to YES if this host will be a gateway. -arpproxy_all="" # replaces obsolete kernel option ARP_PROXYALL. -default_mask="0xffffff00" - -### Other network features -firewall_enable="NO" -firewall_quiet="NO" # be quiet if set. -firewall_type="" # Standard types or absolute pathname. -tcp_extensions="NO" # Allow RFC1323 & RFC1644 extensions (or NO). - -### Overrides for some files in /etc. Leave empty if no override, -### set variable (remember to use multiple lines) to override content. - -host_conf="hosts -bind" -resolv_conf="" -} - -# Try to identify the system by using the MAC address and name of the -# first ethernet interface, made available as $main_eth $main_if -find_system_id() { - main_ether="" - for main_if in `ifconfig -l` ; do - set `ifconfig $main_if` - while [ "$1" != "" ] ; do - if [ $1 = "ether" ] ; then - main_ether=$2 - break 2 - else - shift - fi - done - done -} - -# the following lets the user specify a name and ip for his system -read_address() { - ## XXX disabled - hostname=default - return # - - echo "Please enter a hostname and IP address for your system $main_ether" - read hostname the_ip - if [ "${hostname}" != "" ] ; then - echo "# $main_ether $hostname" >> /etc/hosts - echo "$the_ip $hostname" >> /etc/hosts - else - hostname=default - fi -} - -# set "ether" using $1 (interface name) as search key -get_ether() { - local key - key=$1 - ether="" - set `ifconfig ${key}` - while [ "$1" != "" ] ; do - if [ "$1" = "ether" ] ; then - ether=$2 - break - else - shift - fi - done -} - -# read content from /etc/hosts into a couple of arrays -# (needed later in fetch_hostname) -read_hosts() { - local i a b c key junk - i="" - while read a b c junk ; do - if [ "$a" = "#ethertable" ] ; then - i=0 - elif [ "$i" != "" -a "$a" = "#" -a "$b" != "" ] ; then - eval eth_${i}=$b - eval eth_host_${i}=$c - i=$(($i+1)) - fi - done < /etc/hosts -} - -# set ${hostname} using $1 (MAC address) as search key in /etc/hosts -# Returns empty value if $1 is empty -fetch_hostname() { - local i b key - hostname="" - [ "$1" = "" ] && return - key=$1 - i=0 - b="x" - [ "${eth_0}" = "" ] && read_hosts # fill cache. - while [ "$b" != "" -a "${hostname}" = "" ] ; do - eval b=\${eth_${i}} - case X${key} in - X${b} ) # so we can use wildcards - eval hostname=\${eth_host_${i}} - break - ;; - esac - i=$(($i+1)) - done - echo "fetch_hostname for <${key}> returns <${hostname}>" -} - -# sets "mask" using $1 (netmask name) as the search key in /etc/networks -fetch_mask() { - local a b key junk - key=$1 # search key, typically hostname-netmask - mask="" - while read a b junk; do # key mask otherstuff - case X${key} in - X${a} ) # The X is so we can use wildcards in ${a} - mask=$b - break - ;; - esac - done < /etc/networks - if [ "${mask}" = "" ] ; then - mask=${default_mask} - fi - echo "fetch_mask for <${key}> returns <${mask}>" -} - -# set hostname, and ifconfig_${main_if} (whose MAC is ${main_ether}) -# if not found, read from console -set_main_interface() { - if [ -z "${hostname}" ] ; then - if [ -z "${main_ether}" ] ; then - echo "No ethernets found, using localhost" - hostname=localhost - return - fi - fetch_hostname ${main_ether} - fi - - [ -z "${hostname}" -o "${hostname}" = "." ] && read_address - - fetch_mask ${hostname}-netmask - - eval ifconfig_${main_if}=\" \${hostname} netmask \${mask}\" - network_interfaces=`ifconfig -l` -} - -# set ifconfig_${interface} for all other interfaces -set_all_interfaces() { - local i ether hostname mask - - for i in `ifconfig -l` ; do - if [ "$i" != "${main_if}" ] ; then - get_ether $i - fetch_hostname ${ether} - fetch_mask ${hostname}-netmask - [ -n "${ether}" -a -n "${hostname}" ] && \ - eval ifconfig_${i}=\" \${hostname} netmask \${mask}\" - fi - done -} diff --git a/private/qemu/floppy.tree/root/.profile b/private/qemu/floppy.tree/root/.profile deleted file mode 100644 index 9fe78f2f8..000000000 --- a/private/qemu/floppy.tree/root/.profile +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -export PATH=/stand:/bin:/usr/bin:/usr/local/bin -export LD_LIBRARY_PATH=/lib:/usr/lib:/usr/local/lib -# -./test f1 diff --git a/private/qemu/floppy.tree/root/bri b/private/qemu/floppy.tree/root/bri deleted file mode 100644 index cdccf3c7b..000000000 --- a/private/qemu/floppy.tree/root/bri +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# 20130610 lr -# test code for vale switch -(pkt-gen -i vale0 -f rx -W &); pkt-gen -i vale1 -f tx -b 128 diff --git a/private/qemu/floppy.tree/root/bri.click b/private/qemu/floppy.tree/root/bri.click deleted file mode 100644 index 00f51fe9e..000000000 --- a/private/qemu/floppy.tree/root/bri.click +++ /dev/null @@ -1,19 +0,0 @@ -// -// $Id$ -// -// A sample test configuration for click -// -// -// create a switch - -sw :: EtherSwitch; - -// two input devices - -c0 :: FromDevice(ix0, BURST 30, PROMISC true); -c1 :: FromDevice(ix1, BURST 30, PROMISC true); - -// and now pass packets around - -c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); -c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/private/qemu/floppy.tree/root/rates b/private/qemu/floppy.tree/root/rates deleted file mode 100644 index 3158e38eb..000000000 --- a/private/qemu/floppy.tree/root/rates +++ /dev/null @@ -1,2 +0,0 @@ -I80211b_11M - diff --git a/private/qemu/floppy.tree/root/start_test b/private/qemu/floppy.tree/root/start_test deleted file mode 100644 index b4f267f27..000000000 --- a/private/qemu/floppy.tree/root/start_test +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -sysctl dev.cpu.0.freq=1200 -sysctl dev.cpu.0.freq=2934 -sysctl dev.ix.0.flow_control=0 -sysctl dev.ix.1.flow_control=0 -ifconfig ix0 up -ifconfig ix1 up diff --git a/private/qemu/floppy.tree/root/t1.ck b/private/qemu/floppy.tree/root/t1.ck deleted file mode 100644 index ac143f0ff..000000000 --- a/private/qemu/floppy.tree/root/t1.ck +++ /dev/null @@ -1,11 +0,0 @@ -// test1.ck -s :: InfiniteSource(LENGTH 64, BURST 1, NOTS true) -// -> q :: Queue -// -> c :: Counter - -> d :: Discard(BURST 1); - -DriverManager( - wait 1s, write s.active false, - //print "done $(d.count) packets $(q.drops) drops in 1s" - print "done $(d.count) packets drops in 1s" -); diff --git a/private/qemu/floppy.tree/root/t2.ck b/private/qemu/floppy.tree/root/t2.ck deleted file mode 100644 index c39325ef3..000000000 --- a/private/qemu/floppy.tree/root/t2.ck +++ /dev/null @@ -1,14 +0,0 @@ -// test1.ck -FromDevice(ix0, BURST 100) -> Discard; - -s :: FromDevice(ix1, BURST 100) -> Queue -> ToDevice(ix0, BURST 100); - -DriverManager( - set a 0, - label x, - wait 1s, - set b $(s.count), - print "done $(sub $b $a) packets in 1s", - set a $b, - goto x 1 -); diff --git a/private/qemu/floppy.tree/root/test b/private/qemu/floppy.tree/root/test deleted file mode 100755 index 0f316b53b..000000000 --- a/private/qemu/floppy.tree/root/test +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/sh - -f1() { # default setting - sysctl kern.timecounter.hardware=TSC-low - ifconfig em0 -rxcsum -txcsum - return - ifconfig ed2 delete - dhclient ed2 - sysctl net.inet.ip.fw.verbose=1 -} - -# test tables -f2() { - ipfw table 2 add 22 2000 - ipfw table 2 add 53 3000 - ipfw table 2 add 80 4000 - ipfw table 2 add 127.0.0.1 5000 - ipfw table 2 list -} - -f3() { - ipfw -q flush - ipfw add 100 count log out - ipfw add 200 skipto tablearg lookup dst-port 2 - ipfw add 300 skipto tablearg lookup dst-ip 2 - ipfw add 1000 allow ip from any to any - ipfw add 2000 allow ip from any to any - ipfw add 3000 allow ip from any to any - ipfw add 4000 allow ip from any to any - ipfw add 5000 allow ip from any to any -} - -f4() { - ipfw -q flush - echo > /etc/libalias.conf - sysctl net.inet.ip.fw.verbose=1 - ipfw add 100 divert natd log ip from any to any - ipfw add 100 count ip from any to any - ipfw add 200 count ip from any to any - natd -v -interface ed2 & -} - -f5() { - ipfw pipe 10 config bw 80kbit/s - ipfw add 100 pipe 10 ip from any to any - ipfw pipe show -} - -# test queues -f6() { - ipfw pipe 1 config bw 400kbit/s queue 30 - #ipfw pipe 2 config bw 180kbit/s - #ipfw pipe 3 config delay 30ms queue 100kbytes - ipfw queue 11 config sched 1 weight 1 - ipfw queue 12 config sched 1 weight 2 - ipfw queue 14 config sched 1 weight 4 - ipfw queue 18 config sched 1 weight 8 - ipfw -q flush - ipfw add 100 queue 11 src-ip 0&3 // low bits 00 - ipfw add 100 queue 12 src-ip 1&3 // low bits 01 - ipfw add 100 queue 14 src-ip 2&3 // low bits 10 - ipfw add 100 queue 18 src-ip 3&3 // low bits 11 -} - -# jail test -f7() { -jail -c -nXX vnet path=/ host.hostname=test.me persist=true command=/bin/sh -} - -f8() { - ipfw add 100 queue tablearg lookup dscp 1 - ipfw queue 10 config sched 5 mask queue - ipfw queue 20 config sched 5 mask queue - ipfw queue 30 config sched 5 mask queue - ipfw pipe 5 config bw 80Kbit/s - # ipfw table 1 add 0 10 - ipfw table 1 add 1 20 - ipfw table 1 add 2 30 - ipfw table 1 list -} - -case $1 in - f[0-9]*) $* ;; -esac diff --git a/private/qemu/floppy.tree/test b/private/qemu/floppy.tree/test deleted file mode 100755 index dcc1b646c..000000000 --- a/private/qemu/floppy.tree/test +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/sh - -f1() { - sysctl kern.timecounter.hardware=i8254 - ifconfig ed2 delete - dhclient ed2 - sysctl net.inet.ip.fw.verbose=1 -} - -init() { - local a=$1 - [ "x$a" = "x" ] && a=10.0.0.2 - echo "prepare for tests with netsend" - ifconfig ix0 $1 - sysctl dev.ix.0.enable_aim=0 - sysctl dev.ix.0.queue0.interrupt_rate=5000 - sysctl dev.ix.0.fc=0 - sysctl dev.cpu.0.freq=2934 - sysctl dev.netmap.drop - sysctl net.inet.icmp.icmplim=0 - echo netsend 10.0.0.1 5555 18 0 5 -} - -run_test() { # port num len breakpoint - local i ports=$1 - shift - echo "### break $3 cores $1 len $2" - sysctl dev.netmap.drop=$3 - for i in $1; do (netsend 10.0.0.1 $ports $2 0 5 &) ; done -} - -batch() { # ports len breakpoint - local i ports=$1 len=$2 bp=$3 - for i in 1 2 3 4; do - run_test $ports 0 $len $bp 2>&1 | grep -E "###|time/|send rate" - done - for i in 1 2 3 4; do - run_test $ports "0 1 2 3" $len $bp 2>&1 | grep -E "###|time/|send rate" - done -} - -go() { # ports len breakpoints - local i ports=$1 len=$2 - shift; shift - for i in $* ; do - batch $ports $len $i - done -} - -case $1 in - *) $* ;; -esac diff --git a/private/qemu/run b/private/qemu/run deleted file mode 100755 index a7907191e..000000000 --- a/private/qemu/run +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/sh -# build or run a picobsd instance -PLATFORM=amd64 - -#--- qemu options -QEMU_DIR=/usr/ports-luigi/qemu-netmap/work-head/qemu-1.2.2/x86_64-softmmu/ - -# location of the linux version -QEMU_DIR=/home/luigi/qemu-misc/qemu/x86_64-softmmu/ -BHYVE_DIR=/home/luigi/qemu-misc/bhyve/ - -# location of the freebsd version ? -# QEMU_DIR=/usr/home/luigi/qemu-misc/qemu-head/x86_64-softmmu/ - -[ "$QEMU_PORT" = "" ] && QEMU_PORT=4444 # monitoring port -NET1=" -device e1000,netdev=base -netdev user,id=base" -QEMU_OPTS="-m 1024 -smp 2 " - - -arch= -while [ x"$1" != x ] ; do - case "$1" in - -app) - APP=$2 - shift - ;; - - -j) - PARALLEL="-j 4" - ;; - - -targ*|-arch*) - arch="--arch $2" - PLATFORM=$2 - shift - ;; - - -kern*) - KERNCONF=$2 - shift - ;; - - -nomod*) - export NO_MODULES=yes - ;; - - -clean) - unset NO_CLEAN - ;; - - -depend) - unset NO_KERNELDEPEND - ;; - - ### qemu flags - -nonet) - # NET1=" -net nic,model=e1000-paravirt -net netmap,ifname=valeQE1" - NET1=" $NET1 -device e1000-paravirt" - ;; - -vale) - # NET1=" -net nic,model=e1000-paravirt -net netmap,ifname=valeQE1" - NET1=" $NET1 -device e1000-paravirt,netdev=n -netdev netmap,id=n,ifname=valeQE1" - ;; - -npipe) - # NET1=" -net nic,model=e1000-paravirt -net netmap,ifname=valeQE1" - NET1=" $NET1 -device e1000-paravirt,netdev=n -netdev netmap,id=n,ifname=valeXX:QE1{01" - ;; - - -vale2) - # NET1=" -net nic,model=e1000-paravirt -net netmap,ifname=valeQE1" - NET1=" $NET1 -device e1000,netdev=n -netdev netmap,id=n,ifname=valeQE2" - QEMU_PORT=4445 - ;; - -npipe) - # NET1=" -net nic,model=e1000-paravirt -net netmap,ifname=valeQE1" - NET1=" $NET1 -device e1000,netdev=n -netdev netmap,id=n,ifname=vale:xx{1" - QEMU_PORT=4446 - ;; - -sdl) - VIDEO=" " - ;; - - -vnc) - VIDEO="-vnc :0" - ;; - - -hd*) - hda=$2 - shift - ;; - - -q11) # qemu 11 - QEMU_DIR=/usr/ports/emulators/qemu/work/qemu-0.11.1/x86_64-softmmu/ - ;; - - - *) - break - ;; - - esac - shift -done -# cmd image root - -[ "$VIDEO" = "" ] && VIDEO="-curses -monitor tcp::$QEMU_PORT,server,nowait" - -QEMU_OPTS="$QEMU_OPTS $NET1 $NET2" - -cmd=$1 -pico=$2 -root=$3 -{ shift; shift; shift ; } || echo "ignoring errors" - -QEMU=${QEMU_DIR}qemu-system-x86_64 -[ -f "$hda" ] || hda=build_dir-$pico-$PLATFORM/picobsd.bin -echo "--- $cmd $pico on tree $root" -case "$cmd" in - init) - ../$root/release/picobsd/build/picobsd --src ../$root -n -v \ - $arch --init ${PARALLEL} - ;; - build|bld) - ../$root/release/picobsd/build/picobsd --src ../$root -n -v $pico - #$arch $pico - ;; - run) - $QEMU $QEMU_OPTS $VIDEO -hda $hda $* - ;; - - bhyve) - # COM2="-l com2,/dev/nmdm1A" - echo sudo bhyveload -m 512 -d $hda vm1 - # -h /tmp/diskless - sudo bhyveload -m 512 -d $hda vm1 - sudo ifconfig tap0 up - sudo ${BHYVE_DIR}bhyve -c 1 -s 0,hostbridge \ - -s 2,virtio-net,vale0 \ - -s 3,virtio-net,null \ - -s 4,virtio-net,tap0,mac=00:a0:98:fa:cc:10 \ - -s 30,lpc -l com1,/dev/nmdm0A $COM2 \ - -s 10,ahci-hd,$hda -A -H -P -m 512 vm1 - sudo bhyvectl --destroy --vm=vm1 - ;; - - diskless) # on bhyve - sudo bhyveload -m 512 -h /tmp/diskless vm1 - sudo ifconfig tap0 up - sudo ${BHYVE_DIR}bhyve -c 1 -s 0,hostbridge \ - -s 1,virtio-net,tap0,mac=00:a0:98:fa:cc:10 \ - -s 2,virtio-net,vale0 \ - -s 3,virtio-net,null \ - -s 30,lpc -l com1,/dev/nmdm0A $COM2 \ - -A -H -P -m 512 vm1 - sudo bhyvectl --destroy --vm=vm1 - ;; -esac - diff --git a/private/sys/dev/netmap/cxgbe_netmap.h b/private/sys/dev/netmap/cxgbe_netmap.h deleted file mode 100644 index 73ee8ea61..000000000 --- a/private/sys/dev/netmap/cxgbe_netmap.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * - * netmap modifications for cxgbe - -20120120 -t4_sge seems to be the main file for processing. - -the device has several queues - iq ingress queue (messages posted ?) - fl freelist queue - -buffers are in sd->cl - -interrupts are serviced by t4_intr*() which does a atomic_cmpset_int() -to run only one instance of the driver (service_iq()) and -then clears the flag at the end. -The dispatches in there makes a list (iql) of postponed work. - -Handlers are cpl_handler[] per packet type. - received packets are t4_eth_rx() - -the main transmit routine is t4_main.c :: cxgbe_transmit() - which ends into t4_sge.c :: t4_eth_tx() - and eventually write_txpkt_wr() - -refill_fl() is called under lock -X_RSPD_TYPE_FLBUF is a data packet, perhaps - */ - -#include -#include -// #include -// #include /* vtophys ? */ -#include - -static int cxgbe_netmap_reg(struct ifnet *, int onoff); -static int cxgbe_netmap_txsync(void *, u_int, int); -static int cxgbe_netmap_rxsync(void *, u_int, int); -static void cxgbe_netmap_lock_wrapper(void *, int, u_int); - - -SYSCTL_NODE(_dev, OID_AUTO, cxgbe, CTLFLAG_RW, 0, "cxgbe card"); - -static void -cxgbe_netmap_attach(struct port_info *pi) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = pi->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = 0; // qsize pi->num_tx_desc; - na.num_rx_desc = 0; // XXX qsize pi->num_rx_desc; - na.nm_txsync = cxgbe_netmap_txsync; - na.nm_rxsync = cxgbe_netmap_rxsync; - na.nm_register = cxgbe_netmap_reg; - /* - * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode - * we allocate the buffers on the first register. So we must - * disallow a SIOCSETMTU in netmap mode - */ - na.num_tx_rings = na->num_rx_rings = pi->ntxq; - na.buff_size = NETMAP_BUF_SIZE; - netmap_attach(&na); -} - - -/* - * support for netmap register/unregisted. We are already under core lock. - * only called on the first init or the last unregister. - */ -static int -cxgbe_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - -#if 0 - cxgbe_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - cxgbe_init_locked(adapter); /* also enables intr */ -#endif - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) -{ -#if 0 - // see ixgbe_netmap.h -#endif - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) -{ -#if 0 - // see ixgbe_netmap.h -#endif - return 0; -} diff --git a/private/sys/dev/netmap/if_bge_netmap.h b/private/sys/dev/netmap/if_bge_netmap.h deleted file mode 100644 index fc98eb1ae..000000000 --- a/private/sys/dev/netmap/if_bge_netmap.h +++ /dev/null @@ -1,360 +0,0 @@ -/*- - * (C) 2014 Luigi Rizzo - Universita` di Pisa - * - * BSD copyright - * - * $FreeBSD$ - * - * netmap support for if_bge.c - * see ixgbe_netmap.h for details on the structure of the - * various functions. - */ - -#include -#include -#include -#include /* vtophys ? */ -#include - - -/* - * support for netmap register/unregisted. We are already under core lock. - * only called on the first register or the last unregister. - */ -static int -bge_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct bge_softc *adapter = ifp->if_softc; - - BGE_LOCK(adapter); - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - bge_stop(adapter); - - if (onoff) { - na_set_native_flags(na); - } else { - na_clear_native_flags(na); - } - bge_init_locked(adapter); /* also enables intr */ - BGE_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -bge_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct bge_softc *sc = ifp; - struct netmap_ring *ring = kring->ring; - int delta, j, k, l, lim = kring->nkr_num_slots - 1; - u_int nm_i; - u_int nic_i; - u_int const head = kring->rhead; - - /* bge_tx_cons_idx is the equivalent of TDH on intel cards, - * i.e. the index of the tx frame most recently completed. - */ - l = sc->bge_ldata.bge_status_block->bge_idx[0].bge_tx_cons_idx; - - /* Sync the TX descriptor list */ - bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, - sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_POSTWRITE); - - /* record completed transmissions */ - delta = l - sc->bge_tx_saved_considx; - if (delta < 0) /* wrap around */ - delta += BGE_TX_RING_CNT; - if (delta > 0) { /* some tx completed */ - sc->bge_tx_saved_considx = l; - sc->bge_txcnt -= delta; - kring->nr_hwtail += delta; - if (kring->nr_hwtail > lim) - kring->nr_hwtail -= lim + 1; - } - - /* update tail pointer */ - XXX ring->tail = ... - - j = kring->nr_hwcur; - if (j != k) { /* we have new packets to send */ - bus_dmamap_t *txmap = sc->bge_cdata.bge_tx_dmamap; - int n = 0; - - l = sc->bge_tx_prodidx; - while (j != k) { - struct netmap_slot *slot = &ring->slot[j]; - struct bge_tx_bd *d = &sc->bge_ldata.bge_tx_ring[l]; - void *addr = NMB(na, slot); - int len = slot->len; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - uint64_t paddr = vtophys(addr); - d->bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr); - d->bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr); - /* buffer has changed, unload and reload map */ - netmap_reload_map(sc->bge_cdata.bge_tx_mtag, - txmap[l], addr, na->buff_size); - slot->flags &= ~NS_BUF_CHANGED; - } - slot->flags &= ~NS_REPORT; - d->bge_len = len; - d->bge_flags = BGE_TXBDFLAG_END; - bus_dmamap_sync(sc->bge_cdata.bge_tx_mtag, - txmap[l], BUS_DMASYNC_PREWRITE); - j = nm_next(j, lim); - l = nm_next(l, lim); - n++; - } - kring->nr_hwcur = k; /* the saved ring->cur */ - sc->bge_tx_prodidx = l; - ring->tail = ... - - /* now repeat the last part of bge_start_locked() */ - bus_dmamap_sync(sc->bge_cdata.bge_tx_ring_tag, - sc->bge_cdata.bge_tx_ring_map, BUS_DMASYNC_PREWRITE); - /* Transmit. */ - bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, l); - /* 5700 b2 errata */ - if (sc->bge_chiprev == BGE_CHIPREV_5700_BX) - bge_writembx(sc, BGE_MBX_TX_HOST_PROD0_LO, l); - sc->bge_timer = 5; - } - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - * In bge, the rx ring is initialized by setting the ring size - * bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, BGE_STD_RX_RING_CNT - 1); - * and the receiver always starts from 0. - * sc->bge_rx_saved_considx starts from 0 and is the place from - * which the driver reads incoming packets. - * sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx is the - * next (free) receive buffer where the hardware will put incoming packets. - * - * sc->bge_rx_saved_considx is maintained in software and represents XXX - * - * After a successful rxeof we do - * sc->bge_rx_saved_considx = rx_cons; - * ^---- effectively becomes rx_prod_idx - * - * bge_writembx(sc, BGE_MBX_RX_CONS0_LO, sc->bge_rx_saved_considx); - * ^--- we have freed some descriptors - * - * bge_writembx(sc, BGE_MBX_RX_STD_PROD_LO, (sc->bge_std + - * BGE_STD_RX_RING_CNT - 1) % BGE_STD_RX_RING_CNT); - * ^---- we have freed some buffers - */ -static int -bge_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct bge_softc *sc = a; - struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - uint32_t end; - - /* XXX check sync modes */ - bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, - sc->bge_cdata.bge_rx_return_ring_map, BUS_DMASYNC_POSTREAD); - bus_dmamap_sync(sc->bge_cdata.bge_rx_std_ring_tag, - sc->bge_cdata.bge_rx_std_ring_map, BUS_DMASYNC_POSTWRITE); - - l = sc->bge_rx_saved_considx; - nm_i = kring->nkr_hwtail; - nic_i = netmap_idx_k2n(kring, nm_i); - /* - * First part: import newly received packets - * - /* bge_rx_prod_idx is the same as RDH on intel cards -- the next - * (empty) buffer to be used for receptions. - * To decide when to stop we rely on bge_rx_prod_idx - * and not on the flags in the frame descriptors. - */ - end = sc->bge_ldata.bge_status_block->bge_idx[0].bge_rx_prod_idx; - if (nic_i != end) { - for (n = 0; nic_i != end; n++) { - struct bge_rx_bd *cur_rx; - uint32_t len; - - cur_rx = &sc->bge_ldata.bge_rx_return_ring[l]; - len = cur_rx->bge_len - ETHER_CRC_LEN; - kring->ring->slot[nm_i].len = len; - kring->ring->slot[nm_i].flags = kring->nkr_slot_flags; - /* sync was in bge_newbuf() */ - bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, - sc->bge_cdata.bge_rx_std_dmamap[l], - BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - sc->bge_rx_saved_considx = end; - bge_writembx(sc, BGE_MBX_RX_CONS0_LO, end); - sc->bge_ifp->if_ipackets += n; - kring->nr_hwtail = nm_i; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - n = 0; - nic_i = netmap_idx_k2n(kring, nm_i); - while (nm_i != head) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - struct bge_rx_bd *r = &sc->bge_ldata.bge_rx_std_ring[nic_i]; - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - r->bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr); - r->bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr); - if (slot->flags & NS_BUF_CHANGED) { - netmap_reload_map(sc->bge_cdata.bge_rx_mtag, - sc->bge_cdata.bge_rx_std_dmamap[nic_i], - addr); - slot->flags &= ~NS_BUF_CHANGED; - } - r->bge_flags = BGE_RXBDFLAG_END; - r->bge_len = na->buff_size; - r->bge_idx = nic_i; - bus_dmamap_sync(sc->bge_cdata.bge_rx_mtag, - sc->bge_cdata.bge_rx_std_dmamap[nic_i], - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - n++; - } - kring->nr_hwcur = head; - /* Flush the RX DMA ring */ - - bus_dmamap_sync(sc->bge_cdata.bge_rx_return_ring_tag, - sc->bge_cdata.bge_rx_return_ring_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - } - // nm_rxsync_finalize(kring, resvd); XXX what is resvd? - return 0; -} - - -static void -bge_netmap_tx_init(struct bge_softc *sc) -{ - struct bge_tx_bd *d = sc->bge_ldata.bge_tx_ring; - int i; - struct netmap_adapter *na = NA(sc->bge_ifp); - struct netmap_slot *slot; - - slot = netmap_reset(na, NR_TX, 0, 0); - /* slot is NULL if we are not in native netmap mode */ - if (!slot) - return; - /* in native netmap mode, overwrite addresses and maps */ - for (i = 0; i < BGE_TX_RING_CNT; i++) { - /* - * the first time, ``slot`` points the first slot of - * the ring; the reset might have introduced some kind - * of offset between the kernel and userspace view of - * the ring; for these reasons, we use l to point - * to the slot linked to the i-th descriptor. - */ - void *addr; - uint64_t paddr; - struct netmap_kring *kring = &na->tx_rings[0]; - int l = i + kring->nkr_hwofs; - if (l >= sc->rl_ldata.rl_tx_desc_cnt) - l -= sc->rl_ldata.rl_tx_desc_cnt; - - addr = NMB(na, slot + l); - paddr = vtophys(addr); - d[i].bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr); - d[i].bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr); - netmap_load_map(na, sc->bge_cdata.bge_tx_mtag, - sc->bge_cdata.bge_tx_dmamap[i], - addr, na->buff_size); - } -} - - -static void -bge_netmap_rx_init(struct bge_softc *sc) -{ - /* slot is NULL if we are not in netmap mode */ - struct netmap_adapter *na = NA(sc->bge_ifp); - struct netmap_slot *slot; - struct bge_rx_bd *r = sc->bge_ldata.bge_rx_std_ring; - int i; - - slot = netmap_reset(na, NR_RX, 0, 0); - if (!slot) - return; // not in native mode - - for (i = 0; i < BGE_STD_RX_RING_CNT; i++) { - /* - * the first time, ``slot`` points the first slot of - * the ring; the reset might have introduced some kind - * of offset between the kernel and userspace view of - * the ring; for these reasons, we use l to point - * to the slot linked to the i-th descriptor. - */ - void *addr; - uint64_t paddr; - struct netmap_kring *kring = &na->rx_rings[0]; - int l = i + kring->nkr_hwofs; - if (l >= sc->rl_ldata.rl_rx_desc_cnt) - l -= sc->rl_ldata.rl_rx_desc_cnt; - - addr = NMB(na, slot + l); - paddr = vtophys(addr); - r[i].bge_addr.bge_addr_lo = BGE_ADDR_LO(paddr); - r[i].bge_addr.bge_addr_hi = BGE_ADDR_HI(paddr); - r[i].bge_flags = BGE_RXBDFLAG_END; - r[i].bge_len = na->buff_size; - r[i].bge_idx = i; - /* - * userspace knows that hwcur->hwtail slots were ready - * before the reset, so we need to leave some slots - * unavailable to the driver. - */ - D("incomplete driver: don't know how to reserve slots"); - - netmap_reload_map(na, sc->bge_cdata.bge_rx_mtag, - sc->bge_cdata.bge_rx_std_dmamap[i], - addr, na->buff_size); - } -} - -static void -bge_netmap_attach(struct bge_softc *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->bge_ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = BGE_TX_RING_CNT; - na.num_rx_desc = BGE_STD_RX_RING_CNT; - na.nm_txsync = bge_netmap_txsync; - na.nm_rxsync = bge_netmap_rxsync; - na.nm_register = bge_netmap_reg; - na.num_tx_rings = na.num_rx_rings = 1; - netmap_attach(&na); -} -/* end of file */ diff --git a/private/sys/dev/netmap/if_sfxge_netmap.h b/private/sys/dev/netmap/if_sfxge_netmap.h deleted file mode 100644 index b40df54ac..000000000 --- a/private/sys/dev/netmap/if_sfxge_netmap.h +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Copyright (C) 2014 Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 232238 2012-02-27 19:05:01Z luigi $ - * - * netmap modifications for sfxge - -init: -interrupt: - sfxge_ev: sfxge_ev_qpoll() - in turn calls common/efx_ev.c efx_ev_qpoll() - the queue contains handlers which are interleaved, - The specific drivers are - efx_ev_rx 0 - then call eec_rx() or sfxge_ev_rx - efx_ev_tx 2 - then call eec_tx() or sfxge_ev_tx - plus some generic events. - efx_ev_driver 5 - efx_ev_global 6 - efx_ev_drv_gen 7 - efx_ev_mcdi 0xc - -The receive ring seems to be circular, SFXGE_NDESCS in both rx and tx. - struct sfxge_rxq *rxq; - struct sfxge_rx_sw_desc *rx_desc; - - id = rxq->pending modulo SFXGE_NDESCS - the descriptor is rxq->queue[id] - -each slot has size efx_qword_t (8 bytes with all overlays) - -The card is reset through sfxge_schedule_reset() - -Global lock: - sx_xlock(&sc->softc_lock); - - */ - -#include -#include -/* - * Some drivers may need the following headers. Others - * already include them by default - -#include -#include - - */ -#include - -static void sfxge_stop(struct sfxge_softc *sc); -static int sfxge_start(struct sfxge_softc *sc); -void sfxge_tx_qlist_post(struct sfxge_txq *txq); - - -static int -sfxge_netmap_init_buffers(struct sfxge_softc *sc) -{ - struct netmap_adapter *na = NA(sc->ifnet); - struct netmap_slot *slot; - int i, l, n, max_avail; - void *addr; - uint64_t paddr; - - slot = netmap_reset(na, NR_TX, 0, 0); - if (!slot) - return 0; - // tx rings, see - // sfxge_tx_qinit() - return 0; -} - - -/* - * Register/unregister. We are already under core lock. - * Only called on the first register or the last unregister. - */ -static int -sfxge_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct sfxge_softc *sc = ifp->if_softc; - int error = 0; - - SFXGE_LOCK(sc); - sfxge_stop(sc); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - sfxge_start(sc); /* also enables intr */ - SFXGE_UNLOCK(sc); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - */ -static int -sfxge_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - int reclaim_tx; - - struct sfxge_softc *sc = ifp->if_softc; - struct sfxge_txq *txr = sc->txq[kring->ring_id]; - -// bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, -// BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); /* NIC index */ - for (n = 0; nm_i != head ; n++) { - struct netmap_slot *slot = &ring->slot[j]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - efx_buffer_t *desc; - - NM_CHECK_ADDR_LEN(addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, unload and reload map */ - netmap_reload_map(txr->packet_dma_tag, - txr->stmp[nic_i].map, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - slot->flags &= ~NS_REPORT; - /* - * Fill the slot in the NIC ring. - * In this driver we need to rewrite the buffer - * address in the NIC ring. Other drivers do not - * need this. - * Use legacy descriptor, it is faster. - */ - desc->eb_addr = paddr; - desc->eb_size = len; - desc->eb_eop = 1; - txr->n_pend_desc = 1; - sfxge_tx_qlist_post(txr); - - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->packet_dma_tag, - txr->stmp[nic_i].map, BUS_DMASYNC_PREWRITE); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - - } - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ -// bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, -// BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* (re)start the transmitter up to slot l (excluded) */ -// IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l); - } - - /* - * Reclaim buffers for completed transmissions. - */ - if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { - // XXX todo: add txeof body to reclaim buffers - if (txr->pending != txr->completed) { - n = (txr->pending > txr->completed) ? - txr->pending - txr->completed : - txr->pending - txr->completed + SFXGE_NDESCS; - txr->completed = txr->pending; - kring->nr_hwtail += n; - if (kring->nr_hwtail > lim) - kring->nr_hwtail -= lim + 1; - } - } - - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - */ -static int -sfxge_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct sfxge_softc *sc = ifp->if_softc; - struct sfxge_rxq *rxq = sc->rxq[kring->ring_id]; - struct sfxge_evq *evq = sc->evq[kring->ring_id]; - struct netmap_ring *ring = kring->ring; - u_int j, l, n, lim = kring->nkr_num_slots - 1; - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - u_int k = nm_rx_prologue(kring, &resvd); - - if (k > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ -// bus_dmamap_sync(rxq->rxdma.dma_tag, rxq->rxdma.dma_map, -// BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part, import newly received packets into the netmap ring. - */ - nic_i = rxq->completed; - nm_i = netmap_idx_n2k(kring, nic_i); - - if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - - // see sfxge_rx_qcomplete() - - for (n = 0; l != rxq->pending ; n++) { - struct sfxge_rx_sw_desc *rx_desc = &rxq->queue[nic_i]; - ring->slot[nm_i].len = - rx_desc->size - sc->rx_prefix_size; - ring->slot[nm_i].flags = slot_flags; -// bus_dmamap_sync(rxq->ptag, -// rxq->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ -// rxq->completed = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - if (addr == netmap_buffer_base) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - //netmap_reload_map(rxq->ptag, rxbuf->pmap, addr); - slot->flags &= ~NS_BUF_CHANGED; - } -// curr->wb.upper.status_error = 0; -// curr->read.pkt_addr = htole64(paddr); -// bus_dmamap_sync(rxq->ptag, rxbuf->pmap, -// BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; -// bus_dmamap_sync(rxq->rxdma.dma_tag, rxq->rxdma.dma_map, -// BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* IMPORTANT: we must leave one free slot in the ring, - * so move l back by one unit - */ - nic_i = nm_prev(nic_i, lim); - //IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i); - } - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * The attach routine, called near the end of ixgbe_attach(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -static void -sfxge_netmap_attach(struct sfxge_softc *sc) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = sc->ifnet; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = SFXGE_NDESCS; - na.num_rx_desc = SFXGE_NDESCS; - na.nm_txsync = sfxge_netmap_txsync; - na.nm_rxsync = sfxge_netmap_rxsync; - na.nm_register = sfxge_netmap_reg; - na.num_tx_rings = SFXGE_TXQ_NTYPES + SFXGE_RX_SCALE_MAX; - na.num_rx_rings = SFXGE_RX_SCALE_MAX; - netmap_attach(&na); -} - -/* end of file */ diff --git a/private/test/Makefile b/private/test/Makefile deleted file mode 100644 index 9b7e7623b..000000000 --- a/private/test/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -PROGS= interrupt_stats -NO_MAN= -CLEANFILES=$(PROGS) - -CFLAGS += -Werror -Wall -I../sys -CFLAGS += -Wextra - -.include - -all: $(PROGS) diff --git a/private/test/arp-daemon.c b/private/test/arp-daemon.c deleted file mode 100644 index a38c6636c..000000000 --- a/private/test/arp-daemon.c +++ /dev/null @@ -1,355 +0,0 @@ -#include -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ - -#include /* le64toh */ -#include /* PROT_* */ -#include /* ioctl */ -#include -#include -#include /* sockaddr.. */ -#include /* ntohs */ - -#include /* ifreq */ -#include -#include -#include - -#include /* sockaddr_in */ - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -struct arp { - u_short htype; - u_short ptype; - u_char hlen; - u_char plen; - u_short oper; - u_char sha[6]; - u_char spa[4]; - u_char tha[6]; - u_char tpa[4]; -}; - - -static int -get_ip(const char *ifname, struct in_addr *ip) -{ - int s; - struct ifreq ifreq; - - strcpy(ifreq.ifr_name, ifname); - if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) - return (1); - - if (ioctl(s, SIOCGIFADDR, &ifreq) == -1) { - close(s); - return (1); - } - - close(s); - - bcopy(&((struct sockaddr_in *)(&ifreq.ifr_addr))->sin_addr, ip, 4); - - return (0); -} - - -static void -create_ether(void *pkt, u_char *shost, u_char *dhost) -{ - struct ether_header *eh = (struct ether_header *) pkt; - - memcpy(eh->ether_shost, shost, 6); - memcpy(eh->ether_dhost, dhost, 6); - eh->ether_type = htons(ETHERTYPE_ARP); -} - - -static void -create_arp(void *pkt, u_char *sha, struct in_addr *spa, u_char *tha, - struct in_addr *tpa, int oper) -{ - struct arp *arp; - - arp = (struct arp *) pkt; - arp->htype = htons(1); /* Ethernet */ - arp->ptype = htons(ETHERTYPE_IP); - arp->hlen = 6; - arp->plen = 4; - arp->oper = htons(oper); - memcpy(arp->sha, sha, 6); - memcpy(arp->spa, spa, 4); - if (oper == 2) - memcpy(arp->tha, tha, 6); - memcpy(arp->tpa, tpa, 4); -} - - -static int -process_rings(struct netmap_ring *rxring, struct netmap_ring *txring, - struct in_addr *spa, u_char *shost, int limit) -{ - struct ether_header *eh; - struct arp *arp; - void *rxpkt, *txpkt; - int j, k, m = 0; - - j = rxring->nr_cur; /* RX */ - k = txring->nr_cur; /* TX */ - // XXX not sure the condition is correct - while (rxring->nr_avail > 0 && - txring->nr_avail > 0 && - (m != limit)) { - rxpkt = NETMAP_RING_PACKET(rxring, j); - eh = (struct ether_header *) rxpkt; - if (ntohs(eh->ether_type) != ETHERTYPE_ARP) - goto next; - - arp = (struct arp *) &eh[1]; - if (ntohs(arp->htype) != 1 || - ntohs(arp->ptype) != ETHERTYPE_IP || - arp->hlen != 6 || arp->plen != 4 || - ntohs(arp->oper) != 1 /* request */ || - memcmp(arp->tpa, spa, arp->plen) != 0) - goto next; - - txpkt = NETMAP_RING_PACKET(txring, k); - create_ether(txpkt, shost, arp->sha); - create_arp(txpkt + sizeof(struct ether_header), - shost, (struct in_addr *) arp->tpa, - arp->sha, (struct in_addr *) arp->spa, - 2); - NETMAP_RING_SLOTS(txring)[j].plen = 42; - - txring->nr_avail--; - - m++; - -next: - j = nm_ring_next(rxring, j); - rxring->nr_cur = j; - - k = nm_ring_next(txring, k); - txring->nr_cur = k; - } - - return (m); -} - - -static int -process_interface(struct netmap_if *nifp, struct in_addr *spa, u_char *shost, - int limit) -{ - struct netmap_ring *rxring, *txring; - int j, k, m = 0; - - for (int i = 0; i < nifp->ni_num_queues; i++) { - txring = NETMAP_TX_RING(nifp, i); - if (txring->nr_avail == 0) - continue; - - j = k = 0; - while (j < nifp->ni_num_queues && - k < nifp->ni_num_queues && - (m != limit)) { - rxring = NETMAP_RX_RING(nifp, j); - txring = NETMAP_TX_RING(nifp, k); - - if (rxring->nr_avail == 0) { - j++; - continue; - } - - if (txring->nr_avail == 0) { - k++; - continue; - } - - m += process_rings(rxring, txring, spa, shost, - limit - m); - } - } - return (m); -} - - -static void -print_output(int processed, int total, double delta) -{ - - double pps = processed / delta; - char units[4] = { '\0', 'K', 'M', 'G' }; - int punit = 0; - - while (pps >= 1000) { - pps /= 1000; - punit += 1; - } - - printf("Processed %d of %d requests in %.2f seconds.\n", - processed, total, delta); - printf("Speed: %.2f%cpps. Packet loss: %.2f%%.\n", - pps, units[punit], (total - processed) * 100.0 / total); -} - - -int -main(int arc, char **argv) -{ - int fd, err; - struct nmreq ifreq; - struct netmap_if *nifp; - struct in_addr spa; - void *tmp_addr; - struct pollfd fds[1]; - u_char shost[6]; - int sent = 0, n, burst; - struct timeval tic, toc; - double delta; - - if (arc != 4) { - printf("Usage: %s \n", argv[0]); - return (1); - } - - - /* retrieve ip address. */ - if (get_ip(argv[1], &spa)) { - printf("Unable to retrieve IP address.\n"); - return(1); - } - - /* setup netmap interface. */ - if ((fd = open("/dev/netmap", O_RDWR)) == -1) { - printf("Unable to open \"/dev/netmap\".\n"); - return (1); - } - - strcpy(ifreq.nr_name, argv[1]); - if ((ioctl(fd, NIOCREGIF, &ifreq)) == -1) { - printf("Unable to register \"%s\" interface.\n", argv[1]); - err = 1; - goto close; - } - - tmp_addr = (struct netmap_d *) mmap(0, ifreq.nr_memsize, - PROT_WRITE | PROT_READ, - MAP_SHARED, fd, 0); - if (tmp_addr == MAP_FAILED) { - printf("Unable to mmap.\n"); - err = 1; - goto close; - } - nifp = NETMAP_IF(tmp_addr, ifreq.nr_offset); - - /* retrieve mac address. */ - { - struct ifreq x; - bzero(&x, sizeof(x)); - strncpy(x.ifr_name, argv[1], sizeof(x.ifr_name)); - if ((ioctl(fd, SIOCGIFADDR, &x)) == -1) { - printf("Unable to retrieve MAC address.\n"); - err = 1; - goto unmap; - } - bcopy(&x.ifr_addr.sa_data, shost, 6); - } - - /* how many packets to wait for. */ - n = atoi(argv[2]); - - /* packets burst size. */ - burst = atoi(argv[3]); - - /* setup poll(2) machanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = fd; - fds[0].events = (POLLIN); - - /* Sleep to give the registered interface some to time to - bootstrap. */ - printf("Sleeping 5 secs..\n"); - sleep(5); - - /* wait for the first packet. */ - if (poll(fds, 1, INFTIM) <= 0) { - printf("poll <= 0\n"); - goto unmap; - } - - /* main loop */ - gettimeofday(&tic, NULL); - while (1) { - struct netmap_ring *txring; - int limit, m, done; - - /* Invoke the poll(2) mechanism. - Wait at most 1 second before quitting. */ - if (poll(fds, 1, 1 * 1000) <= 0) { - gettimeofday(&toc, NULL); - toc.tv_sec -= 1; - delta = toc.tv_sec - tic.tv_sec + - (toc.tv_usec - tic.tv_usec) / 1000000.0; - print_output(sent, n, delta); - break; - } - - if (fds[0].revents & POLLIN) { - fds[0].events &= ~POLLIN; - fds[0].events |= POLLOUT; - } - - if (fds[0].revents & POLLOUT) { - limit = MIN(burst, n - sent); - - m = process_interface(nifp, &spa, shost, limit); - sent += m; - - /* re-enable POLLIN on input. */ - fds[0].events |= POLLIN; - ioctl(fd, NIOCSYNCRX, NULL); - - /* disable POLLOUT on output. */ - fds[0].events &= ~POLLOUT; - ioctl(fd, NIOCSYNCTX, NULL); - } - - /* All the responses have benn sent. - Wait all the TX queues to be emtpy. */ - if (sent == n) { - /* wait all the TX queues to be empty. */ - done = 0; - while (!done) { - done = 1; - for (int i = 0; i < nifp->ni_num_queues; i++) { - txring = NETMAP_TX_RING(nifp, i); - if (NETMAP_TX_RING_EMPTY(txring)) - continue; - - done = 0; - ioctl(fds[0].fd, NIOCSYNCTX, NULL); - break; - } - } - gettimeofday(&toc, NULL); - delta = toc.tv_sec - tic.tv_sec + - (toc.tv_usec - tic.tv_usec) / 1000000.0; - print_output(sent, n, delta); - break; - } - } - - ioctl(fd, NIOCUNREGIF, &ifreq); - -unmap: - munmap(tmp_addr, ifreq.nr_memsize); -close: - close(fd); - - return (err); -} diff --git a/private/test/arp-request.c b/private/test/arp-request.c deleted file mode 100644 index 7f06913fd..000000000 --- a/private/test/arp-request.c +++ /dev/null @@ -1,337 +0,0 @@ -#include -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ -#include /* sigsuspend */ - -#include /* le64toh */ -#include /* PROT_* */ -#include /* ioctl */ -#include -#include -#include /* sockaddr.. */ -#include /* ntohs */ - -#include /* ifreq */ -#include -#include -#include - -#include /* sockaddr_in */ - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -struct arp { - u_short htype; - u_short ptype; - u_char hlen; - u_char plen; - u_short oper; - u_char sha[6]; - u_char spa[4]; - u_char tha[6]; - u_char tpa[4]; -}; - - -static int -get_ip(const char *ifname, struct in_addr *ip) -{ - int s; - struct ifreq ifreq; - - strcpy(ifreq.ifr_name, ifname); - if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) - return (1); - - if (ioctl(s, SIOCGIFADDR, &ifreq) == -1) { - close(s); - return (1); - } - - close(s); - - bcopy(&((struct sockaddr_in *)(&ifreq.ifr_addr))->sin_addr, ip, 4); - - return (0); -} - - -static void -create_ether(void *pkt, u_char *shost, u_char *dhost) -{ - struct ether_header *eh = (struct ether_header *) pkt; - - memcpy(eh->ether_shost, shost, 6); - memcpy(eh->ether_dhost, dhost, 6); - eh->ether_type = htons(ETHERTYPE_ARP); -} - - -static void -create_arp(void *pkt, u_char *sha, struct in_addr *spa, u_char *tha, - struct in_addr *tpa, int oper) -{ - struct arp *arp; - - arp = (struct arp *) pkt; - arp->htype = htons(1); /* Ethernet */ - arp->ptype = htons(ETHERTYPE_IP); - arp->hlen = 6; - arp->plen = 4; - arp->oper = htons(oper); - memcpy(arp->sha, sha, 6); - memcpy(arp->spa, spa, 4); - if (oper == 2) - memcpy(arp->tha, tha, 6); - memcpy(arp->tpa, tpa, 4); -} - - -static int -send_request(struct netmap_ring *ring, u_char *shost, u_char *dhost, - struct in_addr *spa, struct in_addr *tpa, int limit) -{ - struct ether_header *eh; - struct arp *arp; - void *pkt; - int j, m = 0; - - j = ring->nr_cur; - while(ring->nr_avail > 0 && (m != limit)) { - pkt = NETMAP_RING_PACKET(ring, j); - - eh = (struct ether_header *) pkt; - create_ether(pkt, shost, dhost); - - arp = (struct arp *) &eh[1]; - create_arp(arp, shost, spa, dhost, tpa, 1); - - NETMAP_RING_SLOTS(ring)[j].plen = 42; - - ring->nr_avail--; - - j = nm_ring_next(ring, j); - m++; - } - ring->nr_cur = j; - - return (m); -} - - -static int -receive_reply(struct netmap_ring *ring, struct in_addr *me, int limit, - int *received) -{ - struct ether_header *eh; - struct arp *arp; - void *pkt; - int j, m = 0; - - j = ring->nr_cur; - while (ring->nr_avail > 0 && (m != limit)) { - pkt = NETMAP_RING_PACKET(ring, j); - - m++; - - eh = (struct ether_header *) pkt; - if (ntohs(eh->ether_type) != ETHERTYPE_ARP) - goto next; - - arp = (struct arp *) &eh[1]; - if (ntohs(arp->htype) != 1 || - ntohs(arp->ptype) != ETHERTYPE_IP || - arp->hlen != 6 || arp->plen != 4 || - ntohs(arp->oper) != 2 /* response */ || - memcmp(arp->tpa, me, arp->plen) != 0) - goto next; - - (*received)++; -next: - j = NETMAP_RING_NEXT(ring, j); - ring->nr_cur = j; - } - - return (m); -} - - -static void -print_output(int received, int sent, double delta) -{ - - double pps = received / delta; - char units[4] = { '\0', 'K', 'M', 'G' }; - int punit = 0; - - while (pps >= 1000) { - pps /= 1000; - punit += 1; - } - - printf("Received %d of %d responses in %.2f seconds.\n", - received, sent, delta); - printf("Speed: %.2f%cpps. Packet loss: %.2f%%.\n", - pps, units[punit], (sent - received) * 100.0 / sent); -} - - -int -main(int arc, char **argv) -{ - int fd, err; - struct ifreq ifreq; - struct netmap_if *nifp; - struct in_addr spa, tpa; - void *tmp_addr; - struct pollfd fds[1]; - u_char shost[6], dhost[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - int sent = 0, received = 0, n, burst; - struct timeval tic, toc; - double delta; - - if (arc != 5) { - printf("Usage: %s \n", argv[0]); - return (1); - } - - /* retrieve source ip address. */ - if (get_ip(argv[1], &spa)) { - printf("Unable to retrieve source IP address.\n"); - return(1); - } - - /* retrieve destination ip address. */ - if (inet_aton(argv[2], &tpa) == 0) { - printf("Unable to parse destination IP address.\n"); - return(1); - } - - - /* setup netmap interface. */ - if ((fd = open("/dev/netmap", O_RDWR)) == -1) { - printf("Unable to open \"/dev/netmap\".\n"); - return (1); - } - - tmp_addr = (struct netmap_d *) mmap(0, NETMAP_MEMORY_SIZE, - PROT_WRITE | PROT_READ, - MAP_SHARED, fd, 0); - if (tmp_addr == MAP_FAILED) { - printf("Unable to mmap.\n"); - err = 1; - goto close; - } - - strcpy(ifreq.ifr_name, argv[1]); - if ((ioctl(fd, NIOCREGIF, &ifreq)) == -1) { - printf("Unable to register \"%s\" interface.\n", argv[1]); - err = 1; - goto unmap; - } - nifp = NETMAP_IF(tmp_addr, ifreq.ifr_data); - - /* retrieve mac address. */ - if ((ioctl(fd, SIOCGIFADDR, &ifreq)) == -1) { - printf("Unable to retrieve MAC address.\n"); - err = 1; - goto unmap; - } - bcopy(&ifreq.ifr_addr.sa_data, shost, 6); - - /* how many packets. */ - n = atoi(argv[3]); - - /* packets burst size. */ - burst = atoi(argv[4]); - - /* setup poll(2) machanism. */ - memset(fds, 0, sizeof(fds)); - fds[0].fd = fd; - fds[0].events = (POLLOUT | POLLIN); - - /* Sleep to give the registered interface some to time to - bootstrap. */ - printf("Sleeping 5 secs..\n"); - sleep(5); - - /* main loop */ - gettimeofday(&tic, NULL); - while (1) { - struct netmap_ring *txring, *rxring; - int limit, m; - - /* Invoke the poll(2) mechanism. - Wait at most 1 second before quitting. */ - if (poll(fds, 1, 1 * 1000) <= 0) { - gettimeofday(&toc, NULL); - toc.tv_sec -= 1; - delta = toc.tv_sec - tic.tv_sec + - (toc.tv_usec - tic.tv_usec) / 1000000.0; - print_output(received, sent, delta); - break; - } - - - /* Process received packets. */ - if (fds[0].revents & POLLIN) { - limit = MIN(burst, n - received); - for (int i = 0; i < nifp->ni_num_queues; i++) { - rxring = NETMAP_RX_RING(nifp, i); - if (rxring->nr_avail == 0) - continue; - - m = receive_reply(rxring, &spa, - limit, &received); - limit -= m; - if (limit == 0) - break; - } - ioctl(fds[0].fd, NIOCSYNCRX, NULL); - } - - if (fds[0].revents & POLLOUT) { - limit = MIN(burst, n - sent); - for (int i = 0; i < nifp->ni_num_queues; i++) { - txring = NETMAP_TX_RING(nifp, i); - if (txring->nr_avail == 0) - continue; - - m = send_request(txring, shost, dhost, - &spa, &tpa, limit); - sent += m; - limit -= m; - if (limit == 0) - break; - } - ioctl(fds[0].fd, NIOCSYNCTX, NULL); - - /* disable WR polling when done. */ - if (sent == n) - fds[0].events &= ~POLLOUT; - } - - /* All the responses have been received correctly. */ - if (received == n) { - gettimeofday(&toc, NULL); - delta = toc.tv_sec - tic.tv_sec + - (toc.tv_usec - tic.tv_usec) / 1000000.0; - print_output(received, sent, delta); - break; - } - - } - - ioctl(fd, NIOCUNREGIF, &ifreq); - -unmap: - munmap(tmp_addr, NETMAP_MEMORY_SIZE); -close: - close(fd); - - return (err); -} diff --git a/private/test/interrupt_stats.c b/private/test/interrupt_stats.c deleted file mode 100644 index f7bd42029..000000000 --- a/private/test/interrupt_stats.c +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include - -#include -#include -#include /* sysctl* */ -#include /* selinfo */ -#include /* sockaddr */ -#include -#include /* bus_addr_t */ -#include /* dma*tag */ - -#include - -#include -#include - - -int -main(int argc, char **argv) -{ - struct stats statz; - size_t slen; - char cmd[256]; - int i; - - if (argc < 2) { - fprintf(stderr, "Usage: %s driver" - "\n" - "supported drivers: lem, ixgbe\n" - "", - argv[0]); - return 1; - } - - snprintf(cmd, sizeof(cmd), "dev.%s.stats", argv[1]); - - slen = sizeof(statz); - if (sysctlbyname(cmd, &statz, &slen, NULL, 0)) { - warn("unable to read %s", cmd); - return 1; - } - - for (i = 0; i < NETMAP_MAX_STATS; i++) - fprintf(stdout, "%llu %u %u\n", - statz.statsdata[i].tsc, - statz.statsdata[i].unit, - statz.statsdata[i].queue); - return 0; -} diff --git a/private/test/lro.html b/private/test/lro.html deleted file mode 100644 index bf7d66ca6..000000000 --- a/private/test/lro.html +++ /dev/null @@ -1,59 +0,0 @@ - - - - - - diff --git a/private/test/nest.c b/private/test/nest.c deleted file mode 100644 index b08cb6ef9..000000000 --- a/private/test/nest.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include -int f_0(int x) -{ - return x + 1; -} - -int f_100(int x); -int main(int ac, char *av[]) -{ - int i, cnt, lim = atoi(av[1]); - volatile uint64_t res; - for (cnt = 0; cnt < lim; cnt++) { - uint64_t n = 0; - for (n = 0; n < 1000000; n++) - n += f_100(n); - res = n; - } -} - diff --git a/private/test/netmap_drop.diff b/private/test/netmap_drop.diff deleted file mode 100644 index 68a377de6..000000000 --- a/private/test/netmap_drop.diff +++ /dev/null @@ -1,567 +0,0 @@ -Index: head/sys/netinet/udp_usrreq.c -=================================================================== ---- head/sys/netinet/udp_usrreq.c (revision 234237) -+++ head/sys/netinet/udp_usrreq.c (working copy) -@@ -943,6 +943,7 @@ - #define UH_WLOCKED 2 - #define UH_RLOCKED 1 - #define UH_UNLOCKED 0 -+extern int netmap_drop; // XXX - static int - udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, - struct mbuf *control, struct thread *td) -@@ -957,6 +958,7 @@ - u_short fport, lport; - int unlock_udbinfo; - -+ if (netmap_drop == 32) { m_freem(m); if (control) m_freem(control); return 0; } // XXX drop - /* - * udp_output() may need to temporarily bind or connect the current - * inpcb. As such, we don't know up front whether we will need the -@@ -1072,10 +1074,12 @@ - error = EINVAL; - goto release; - } -+ if (netmap_drop == 33) { error = 0; goto release; } // XXX drop - error = in_pcbbind_setup(inp, (struct sockaddr *)&src, - &laddr.s_addr, &lport, td->td_ucred); - if (error) - goto release; -+ if (netmap_drop == 34) { error = 0; goto release; } // XXX drop - } - - /* -@@ -1097,9 +1101,11 @@ - * Jail may rewrite the destination address, so let it do - * that before we use it. - */ -+ if (netmap_drop == 35) { error = 0; goto release; } // XXX drop - error = prison_remote_ip4(td->td_ucred, &sin->sin_addr); - if (error) - goto release; -+ if (netmap_drop == 36) { error = 0; goto release; } // XXX drop - - /* - * If a local address or port hasn't yet been selected, or if -@@ -1160,6 +1166,7 @@ - } - } - -+ if (netmap_drop == 37) { error = 0; goto release; } // XXX drop - /* - * Calculate data length and get a mbuf for UDP, IP, and possible - * link-layer headers. Immediate slide the data pointer back forward -@@ -1230,6 +1237,7 @@ - INP_HASH_WUNLOCK(&V_udbinfo); - else if (unlock_udbinfo == UH_RLOCKED) - INP_HASH_RUNLOCK(&V_udbinfo); -+ if (netmap_drop == 31) { error = 0; goto release; } // XXX - error = ip_output(m, inp->inp_options, NULL, ipflags, - inp->inp_moptions, inp); - if (unlock_udbinfo == UH_WLOCKED) -@@ -1575,6 +1583,7 @@ - { - struct inpcb *inp; - -+ if (netmap_drop == 30) { m_freem(m); if (control) m_freem(control); return 0; } // XXX - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("udp_send: inp == NULL")); - return (udp_output(inp, m, addr, control, td)); -Index: head/sys/netinet/ip_output.c -=================================================================== ---- head/sys/netinet/ip_output.c (revision 234237) -+++ head/sys/netinet/ip_output.c (working copy) -@@ -98,7 +98,7 @@ - - extern int in_mcast_loop; - extern struct protosw inetsw[]; -- -+extern int netmap_drop; - /* - * IP output. The packet in mbuf chain m contains a skeletal IP - * header (with len, off, ttl, proto, tos, src, dst). -@@ -134,6 +134,7 @@ - #endif - M_ASSERTPKTHDR(m); - -+ if (netmap_drop == 50) {goto bad; } // XXX - if (inp != NULL) { - INP_LOCK_ASSERT(inp); - M_SETFIB(m, inp->inp_inc.inc_fibnum); -@@ -164,6 +165,7 @@ - } - #endif - } -+ if (netmap_drop == 51) {goto bad; } // XXX - - if (opt) { - int len = 0; -@@ -305,6 +307,7 @@ - else - isbroadcast = in_broadcast(dst->sin_addr, ifp); - } -+ if (netmap_drop == 56) {goto bad; } // XXX - /* - * Calculate MTU. If we have a route that is up, use that, - * otherwise use the interface's MTU. -@@ -476,6 +479,7 @@ - } - - sendit: -+ if (netmap_drop == 52) {goto bad; } // XXX - #ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &flags, &error)) { - case 1: -@@ -503,6 +507,7 @@ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) - goto passout; - -+ if (netmap_drop == 53) {goto bad; } // XXX - /* Run through list of hooks for output packets. */ - odst.s_addr = ip->ip_dst.s_addr; - error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); -@@ -568,6 +573,7 @@ - #endif /* IPFIREWALL_FORWARD */ - - passout: -+ if (netmap_drop == 54) {goto bad; } // XXX - /* 127/8 must not appear on wire - RFC1122. */ - if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { -@@ -628,6 +634,7 @@ - * to avoid confusing lower layers. - */ - m->m_flags &= ~(M_PROTOFLAGS); -+ if (netmap_drop == 55) {goto bad; } // XXX - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro); - goto done; -Index: head/sys/kern/uipc_syscalls.c -=================================================================== ---- head/sys/kern/uipc_syscalls.c (revision 234237) -+++ head/sys/kern/uipc_syscalls.c (working copy) -@@ -680,6 +680,7 @@ - return (error); - } - -+extern int netmap_drop; // XXX - static int - sendit(td, s, mp, flags) - struct thread *td; -@@ -696,6 +697,7 @@ - return (ECAPMODE); - #endif - -+ if (netmap_drop == 21) return 0; // XXX - if (mp->msg_name != NULL) { - error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); - if (error) { -@@ -706,6 +708,7 @@ - } else { - to = NULL; - } -+ if (netmap_drop == 22) { error = 0; goto bad; } // XXX - - if (mp->msg_control) { - if (mp->msg_controllen < sizeof(struct cmsghdr) -@@ -735,6 +738,7 @@ - control = NULL; - } - -+ if (netmap_drop == 23) {error =0; goto bad; } // XXX - error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); - - bad: -@@ -767,6 +771,7 @@ - rights = CAP_WRITE; - if (mp->msg_name != NULL) - rights |= CAP_CONNECT; -+ if (netmap_drop == 24) { return 0; } // XXX - error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL); - if (error) - return (error); -@@ -807,6 +812,7 @@ - ktruio = cloneuio(&auio); - #endif - len = auio.uio_resid; -+ if (netmap_drop == 25) { error = 0; goto bad; } // XXX - error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); - if (error) { - if (auio.uio_resid != len && (error == ERESTART || -@@ -849,6 +855,7 @@ - struct iovec aiov; - int error; - -+ if (netmap_drop == 20) return 0; - msg.msg_name = uap->to; - msg.msg_namelen = uap->tolen; - msg.msg_iov = &aiov; -Index: head/sys/kern/uipc_socket.c -=================================================================== ---- head/sys/kern/uipc_socket.c (revision 234237) -+++ head/sys/kern/uipc_socket.c (working copy) -@@ -853,6 +853,7 @@ - return (error); - } - -+extern int netmap_drop; // XXX - #ifdef ZERO_COPY_SOCKETS - struct so_zerocopy_stats{ - int size_ok; -@@ -995,6 +996,7 @@ - int atomic = sosendallatonce(so) || top; - #endif - -+ if (netmap_drop == 40) {error = 0; goto out;} // XXX - KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); - KASSERT(so->so_proto->pr_flags & PR_ATOMIC, - ("sodgram_send: !PR_ATOMIC")); -@@ -1071,6 +1073,7 @@ - error = EMSGSIZE; - goto out; - } -+ if (netmap_drop == 41) {error = 0; goto out;} // XXX - if (uio == NULL) { - resid = 0; - if (flags & MSG_EOR) -@@ -1096,6 +1099,7 @@ - #endif - resid = uio->uio_resid; - } -+ if (netmap_drop == 42) {error = 0; goto out;} // XXX - KASSERT(resid == 0, ("sosend_dgram: resid != 0")); - /* - * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock -@@ -1106,6 +1110,7 @@ - so->so_options |= SO_DONTROUTE; - SOCK_UNLOCK(so); - } -+ if (netmap_drop == 43) {error = 0; goto out;} // XXX - /* - * XXX all the SBS_CANTSENDMORE checks previously done could be out - * of date. We could have recieved a reset packet in an interrupt or -Index: head/sys/dev/netmap/netmap.c -=================================================================== ---- head/sys/dev/netmap/netmap.c (revision 234288) -+++ head/sys/dev/netmap/netmap.c (working copy) -@@ -115,7 +115,100 @@ - SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, - CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); - -+/* -+ * debugging support to analyse syscall behaviour -+ * netmap_drop is the point where to drop - -+ Path is: -+ -+ ./libthr/thread/thr_syscalls.c -+ lib/libc/i386/SYS.h -+ lib/libc/i386/sys/syscall.S -+ -+ head/sys/kern/syscall.master -+ ; Processed to created init_sysent.c, syscalls.c and syscall.h. -+ sys/kern/uipc_syscalls.c::sys_sendto() -+ sendit() -+ kern_sendit() -+ sosend() -+ sys/kern/uipc_socket.c::sosend() -+ so->so_proto->pr_usrreqs->pru_sosend(...) -+ sys/netinet/udp_usrreq.c::udp_usrreqs { } -+ .pru_sosend = sosend_dgram, -+ .pru_send = udp_send, -+ .pru_soreceive = soreceive_dgram, -+ sys/kern/uipc_socket.c::sosend_dgram() -+ m_uiotombuf() -+ (*so->so_proto->pr_usrreqs->pru_send) -+ sys/netinet/udp_usrreq.c::udp_send() -+ sotoinpcb(so); -+ udp_output() -+ INP_RLOCK(inp); -+ INP_HASH_RLOCK(&V_udbinfo); -+ fill udp and ip headers -+ ip_output() -+ -+ 30 udp_send() before udp_output -+ 31 udp_output before ip_output -+ 32 udp_output beginning -+ 33 before in_pcbbind_setup -+ 34 after in_pcbbind_setup -+ 35 before prison_remote_ip4 -+ 36 after prison_remote_ip4 -+ 37 before computing udp -+ -+ 20 beginning of sys_sendto -+ 21 beginning of sendit -+ 22 sendit after getsockaddr -+ 23 before kern_sendit -+ 24 kern_sendit before getsock_cap() -+ 25 kern_sendit before sosend() -+ -+ 40 sosend_dgram beginning -+ 41 sosend_dgram after sbspace -+ 42 sosend_dgram after m_uiotombuf -+ 43 sosend_dgram after SO_DONTROUTE -+ 44 sosend_dgram after pru_send (useless) -+ -+ 50 ip_output beginning -+ 51 ip_output after flowtable -+ 52 ip_output at sendit -+ 53 ip_output after pfil_hooked -+ 54 ip_output at passout -+ 55 ip_output before if_output -+ 56 ip_output after rtalloc etc. -+ -+ 60 uiomove print -+ -+ 70 pfil.c:: pfil_run_hooks beginning -+ 71 print number of pfil entries -+ -+ 80 ether_output start -+ 81 ether_output after first switch -+ 82 ether_output after M_PREPEND -+ 83 ether_output after simloop -+ 84 ether_output after carp and netgraph -+ 85 ether_output_frame before if_transmit() -+ -+ 90 ixgbe_mq_start (if_transmit) beginning -+ 91 ixgbe_mq_start_locked before ixgbe_xmit -+ -+FLAGS: -+ 1 disable ETHER_BPF_MTAP -+ 2 disable drbr stats update -+ 4 -+ 8 -+ 16 -+ 32 -+ 64 -+ 128 -+ */ -+int netmap_drop = 0; -+int netmap_flags = 0; /* debug flags */ -+ -+SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); -+SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); -+ - /*------------- memory allocator -----------------*/ - #ifdef NETMAP_MEM2 - #include "netmap_mem2.c" -@@ -1067,7 +1160,8 @@ - kring->nr_hwcur + kring->nr_hwavail, len); - na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); - if (kring->nr_hwavail >= lim) { -- D("stack ring %s full\n", ifp->if_xname); -+ if (netmap_verbose) -+ D("stack ring %s full\n", ifp->if_xname); - goto done; /* no space */ - } - if (len > NETMAP_BUF_SIZE) { -@@ -1263,7 +1357,7 @@ - netmap_loader(__unused struct module *module, int event, __unused void *arg) - { - int error = 0; -- -+D("sizeof int %d", sizeof(int)); - switch (event) { - case MOD_LOAD: - error = netmap_init(); -Index: head/sys/dev/ixgbe/ixgbe.c -=================================================================== ---- head/sys/dev/ixgbe/ixgbe.c (revision 234237) -+++ head/sys/dev/ixgbe/ixgbe.c (working copy) -@@ -322,10 +322,11 @@ - * be a reference on how to implement netmap support in a driver. - * Additional comments are in ixgbe_netmap.h . - * -- * contains functions for netmap support -+ * contains functions for netmap support - * that extend the standard driver. - */ - #include -+extern int netmap_flags, netmap_drop; // XXX - #endif /* DEV_NETMAP */ - - /********************************************************************* -@@ -797,20 +798,28 @@ - struct tx_ring *txr; - int i = 0, err = 0; - -+ if (netmap_drop == 90) {m_freem(m); return 0; } // XXX - /* Which queue to use */ -+ if (netmap_flags & 4 && !(m->m_flags & M_FLOWID)) printf("%s %d no flowid curcpu %d\n", __func__, __LINE__, curcpu); - if ((m->m_flags & M_FLOWID) != 0) - i = m->m_pkthdr.flowid % adapter->num_queues; - else - i = curcpu % adapter->num_queues; -+ if (netmap_flags & 32) i = 0; // XXX - - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - -+ /* -+ * using IXGBE_TX_TRYLOCK() saves about 100ns/pkt: even if -+ * contentions are infrequent, when they happen we lose a lot. -+ */ - if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) && - IXGBE_TX_TRYLOCK(txr)) { - err = ixgbe_mq_start_locked(ifp, txr, m); - IXGBE_TX_UNLOCK(txr); - } else { -+ if (netmap_drop == 92) {m_freem(m); return 0; } // XXX - err = drbr_enqueue(ifp, txr->br, m); - taskqueue_enqueue(que->tq, &que->que_task); - } -@@ -845,6 +854,7 @@ - - /* Process the queue */ - while (next != NULL) { -+ if (netmap_drop == 91) {m_freem(next); err = 0; goto cont; } // XXX - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next != NULL) - err = drbr_enqueue(ifp, txr->br, next); -@@ -862,6 +872,7 @@ - txr->queue_status |= IXGBE_QUEUE_DEPLETED; - break; - } -+cont: // XXX - next = drbr_dequeue(ifp, txr->br); - } - -@@ -1712,6 +1723,7 @@ - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - -+ if (netmap_drop == 93) {m_freem(m_head); return 0; } // XXX - /* - * Map the packet for DMA. - */ -@@ -1752,6 +1764,7 @@ - *m_headp = NULL; - return (error); - } -+ if (netmap_drop == 94) {m_freem(*m_headp); return 0; } // XXX - - /* Make certain there are enough descriptors */ - if (nsegs > txr->tx_avail - 2) { -@@ -1785,6 +1798,7 @@ - #endif - - #ifdef IXGBE_FDIR -+----- - /* Do the flow director magic */ - if ((txr->atr_sample) && (!adapter->fdir_reinit)) { - ++txr->atr_count; -Index: head/sys/net/pfil.c -=================================================================== ---- head/sys/net/pfil.c (revision 234237) -+++ head/sys/net/pfil.c (working copy) -@@ -62,6 +62,8 @@ - VNET_DEFINE(struct pfilheadhead, pfil_head_list); - #define V_pfil_head_list VNET(pfil_head_list) - -+extern int netmap_drop; -+ - /* - * pfil_run_hooks() runs the specified packet filter hooks. - */ -@@ -73,8 +75,18 @@ - struct packet_filter_hook *pfh; - struct mbuf *m = *mp; - int rv = 0; -- -+if (netmap_drop == 70) return 0; -+ - PFIL_RLOCK(ph, &rmpt); -+if (netmap_drop == 71) { -+ int num=0, act=0; -+ for (pfh = pfil_hook_get(dir, ph); pfh != NULL; -+ pfh = TAILQ_NEXT(pfh, pfil_link)) { -+ num++; -+ if (pfh->pfil_func != NULL) act++; -+ } -+ printf("dir %d total %d active %d\n", dir, num, act); -+} - KASSERT(ph->ph_nhooks >= 0, ("Pfil hook count dropped < 0")); - for (pfh = pfil_hook_get(dir, ph); pfh != NULL; - pfh = TAILQ_NEXT(pfh, pfil_link)) { -Index: head/sys/net/if_ethersubr.c -=================================================================== ---- head/sys/net/if_ethersubr.c (revision 234237) -+++ head/sys/net/if_ethersubr.c (working copy) -@@ -148,7 +148,7 @@ - #define V_ether_ipfw VNET(ether_ipfw) - #endif - -- -+extern int netmap_flags, netmap_drop; // XXX - /* - * Ethernet output routine. - * Encapsulate a packet of type family for the local net. -@@ -169,6 +169,7 @@ - int loop_copy = 1; - int hlen; /* link layer header length */ - -+ if (netmap_drop == 80) { error = 0; goto bad; } // XXX - if (ro != NULL) { - if (!(m->m_flags & (M_BCAST | M_MCAST))) - lle = ro->ro_lle; -@@ -191,6 +192,7 @@ - switch (dst->sa_family) { - #ifdef INET - case AF_INET: -+if (netmap_flags & 8 && lle == NULL) printf("%s %d ro %p rt0 %p no lle\n", __FUNCTION__, __LINE__, ro, rt0); - if (lle != NULL && (lle->la_flags & LLE_VALID)) - memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); - else -@@ -317,6 +319,7 @@ - return (if_simloop(ifp, m, dst->sa_family, 0)); - } - -+ if (netmap_drop == 81) { error = 0; goto bad; } // XXX - /* - * Add local net header. If no space in first mbuf, - * allocate another. -@@ -325,9 +328,12 @@ - if (m == NULL) - senderr(ENOBUFS); - eh = mtod(m, struct ether_header *); -+ if (netmap_drop == 87) { error = 0; goto bad; } // XXX - (void)memcpy(&eh->ether_type, &type, - sizeof(eh->ether_type)); -+ if (netmap_drop == 88) { error = 0; goto bad; } // XXX - (void)memcpy(eh->ether_dhost, edst, sizeof (edst)); -+ if (netmap_drop == 89) { error = 0; goto bad; } // XXX - if (hdrcmplt) - (void)memcpy(eh->ether_shost, esrc, - sizeof(eh->ether_shost)); -@@ -335,6 +341,7 @@ - (void)memcpy(eh->ether_shost, IF_LLADDR(ifp), - sizeof(eh->ether_shost)); - -+ if (netmap_drop == 82) { error = 0; goto bad; } // XXX - /* - * If a simplex interface, and the packet is being sent to our - * Ethernet address or a broadcast address, loopback a copy. -@@ -387,6 +394,7 @@ - } - } - -+ if (netmap_drop == 83) { error = 0; goto bad; } // XXX - /* - * Bridges require special output handling. - */ -@@ -414,6 +422,7 @@ - return (0); - } - -+ if (netmap_drop == 84) { error = 0; goto bad; } // XXX - /* Continue with link-layer output */ - return ether_output_frame(ifp, m); - } -@@ -440,6 +449,7 @@ - } - #endif - -+ if (netmap_drop == 85) { m_freem(m); return 0; } // XXX - /* - * Queue message on interface, update output statistics if - * successful, and start output if interface not yet active. diff --git a/private/test/test-nest b/private/test/test-nest deleted file mode 100644 index f8bb585c4..000000000 --- a/private/test/test-nest +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh -# -# test the nesting of calls in C -LIM=100 -build1() { # build one entry, - local i=0 j - local dst=nest_f1.c - rm -f $dst - while [ $i -lt $LIM ] ; do - j=$(( $i + 1 )) - echo "int f_$j(int x) { x = f_$i(x + 2); return x + 3; }" >> $dst - i=$j - done -} - -build100() { # build many entries - rm -rf test100 - mkdir test100 - local i=0 j - while [ $i -lt $LIM ] ; do - j=$(( $i + 1 )) - echo "int f_$i(int); int f_$j(int x) { x = f_$i(x + 2); return x + 3; }" > test100/f$j.c - i=$j - done -} -build1 -build100 diff --git a/private/test/test_device.c b/private/test/test_device.c deleted file mode 100644 index 268933da9..000000000 --- a/private/test/test_device.c +++ /dev/null @@ -1,191 +0,0 @@ -#include -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ - -#include /* PROT_* */ -#include /* ioctl */ -#include /* LIST_* */ -#include -#include -#include -#include /* sockaddr.. */ - -#include -#include /* ifreq */ -#include -#include - -#include "testnetmap.h" -#include "test_device.h" - - -#ifdef VERBOSE -#undef VERBOSE -#endif -#define VERBOSE 1 - - - -int -netmap_open(void) -{ - int fd; - - fd = open("/dev/netmap", O_RDWR); - ASSERT(fd != -1); - - return (fd); -} - - -void -netmap_close(int fd) -{ - int ret; - - ret = close(fd); - ASSERT(ret != -1); -} - - -void * -netmap_mmap(int fd, int l) -{ - void *tmp_addr; - - tmp_addr = mmap(0, l, PROT_WRITE | PROT_READ, - MAP_SHARED, fd, 0); - ASSERT(tmp_addr != MAP_FAILED); - - return (tmp_addr); -} - - -static void -test_netmap_open_close(void) -{ - int fd, fd1; - - fd = netmap_open(); - fd1 = netmap_open(); - - netmap_close(fd1); - netmap_close(fd); - - SUCCESS(); -} - - -static void -test_netmap_ioctl(const char *ifname) -{ - int fd, fd1; - struct nmreq ifreq; - - fd = netmap_open(); - fd1 = netmap_open(); - - strcpy(ifreq.nr_name, "fu0"); - /* unable to register unexistent interface */ - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) == -1); - - strcpy(ifreq.nr_name, ifname); - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) != -1); - /* unable to register multiple interfaces */ - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) == -1); - /* register the same interface on different fds. */ - ASSERT(ioctl(fd1, NIOCREGIF, &ifreq) != -1); - - /* check if the driver support userspace synchronization. */ - ASSERT(ioctl(fd, NIOCTXSYNC, &ifreq) != -1); - ASSERT(ioctl(fd, NIOCRXSYNC, &ifreq) != -1); - - ASSERT(ioctl(fd1, NIOCUNREGIF, &ifreq) != -1); - ASSERT(ioctl(fd, NIOCUNREGIF, &ifreq) != -1); - /* unable to unregister an interface twice */ - ASSERT(ioctl(fd, NIOCUNREGIF, &ifreq) == -1); - - netmap_close(fd1); - netmap_close(fd); - - SUCCESS(); -} - - -static void -test_netmap_mmap(const char *ifname) -{ - int fd; - void *tmp_addr; - struct nmreq ifreq; - int l; - - fd = netmap_open(); - strcpy(ifreq.nr_name, ifname); - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) != -1); - l = ifreq.nr_memsize; - - tmp_addr = netmap_mmap(fd, l); - ASSERT(munmap(tmp_addr, 1024) != -1); - - ASSERT(ioctl(fd, NIOCUNREGIF, &ifreq) != -1); - netmap_close(fd); - - SUCCESS(); -} - - -static void -test_netmap_poll(const char *ifname) -{ - int fd, ret; - struct ifreq ifreq; - struct pollfd fds[1]; - - fd = netmap_open(); - - memset(fds, 0, sizeof(fds)); - fds[0].fd = fd; - - /* no registered interface: POLLERR */ - ASSERT(poll(fds, 1, INFTIM) == 1); - ASSERT(fds[0].revents & POLLERR); - - strcpy(ifreq.ifr_name, ifname); - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) != -1); - - /* noone is sending packets, so we cannot read: timeout */ - fds[0].events = (POLLIN | POLLRDNORM); - ASSERT((ret = poll(fds, 1, 1000)) != -1); - if (ret > 0) - ASSERT(fds[0].revents & POLLIN && - fds[0].revents & POLLRDNORM); - - /* the ring is empty, if we want to write we can do it. */ - fds[0].events = (POLLOUT | POLLWRNORM); - ASSERT((ret = poll(fds, 1, 1000)) != -1); - if (ret > 0) - ASSERT(fds[0].revents & POLLOUT && - fds[0].revents & POLLWRNORM); - - ASSERT(ioctl(fd, NIOCUNREGIF, &ifreq) != -1); - netmap_close(fd); - - SUCCESS(); -} - - -void -test_device(const char *ifname) -{ - test_netmap_open_close(); - - test_netmap_ioctl(ifname); - - test_netmap_mmap(ifname); - - test_netmap_poll(ifname); -} diff --git a/private/test/test_device.h b/private/test/test_device.h deleted file mode 100644 index 34538dc57..000000000 --- a/private/test/test_device.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _NETMAP_TEST_DEVICE_H -#define _NETMAP_TEST_DEVICE_H - -#include - - -int netmap_open(void); -void netmap_close(int fd); -void netmap_ioctl(int fd, u_long cmd, caddr_t data); -void *netmap_mmap(int fd, int l); - -#endif /* _NETMAP_TEST_DEVICE_H */ diff --git a/private/test/test_speed.c b/private/test/test_speed.c deleted file mode 100644 index cfd10b567..000000000 --- a/private/test/test_speed.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include - -#include /* ioctl */ -#include /* LIST_* */ -#include -#include /* sockaddr .. */ - -#include /* IFNAMSIZ */ -#include -#include - -#include "testnetmap.h" -#include "test_speed.h" -#include "test_device.h" - -#define ITERATIONS 100000 - - -static struct timing_method t_methods[] = { - { "gettimeofday()", TIMING_GTD, 0 }, - /*{ "clock_gettime(CLOCK_REALTIME)", TIMING_CGT, CLOCK_REALTIME },*/ - /*{ "clock_gettime(CLOCK_REALTIME_PRECISE)", TIMING_CGT, CLOCK_REALTIME_PRECISE },*/ - /*{ "clock_gettime(CLOCK_REALTIME_FAST)", TIMING_CGT, CLOCK_REALTIME_FAST },*/ - /*{ "clock_gettime(CLOCK_MONOTONIC)", TIMING_CGT, CLOCK_MONOTONIC },*/ - /*{ "clock_gettime(CLOCK_MONOTONIC_PRECISE)", TIMING_CGT, CLOCK_MONOTONIC_PRECISE },*/ - /*{ "clock_gettime(CLOCK_MONOTONIC_FAST)", TIMING_CGT, CLOCK_MONOTONIC_FAST },*/ - { "", 0, 0 } -}; - - -static void -test_ioctl_speed(const char *ifname) -{ - int fd, i; - double ravg = 0; - struct nmreq req; - struct netmap_if *nifp; - void *tmp_addr; - - fd = netmap_open(); - tmp_addr = netmap_mmap(fd, 1024 /* XXX */); - - strcpy(req.nr_name, ifname); - /* single queue sync. */ - req.nr_ringid = 0 | NETMAP_HW_RING; - ASSERT(ioctl(fd, NIOCREGIF, &req) != -1); - nifp = NETMAP_IF(tmp_addr, req.nr_offset); - - /* multi-queue sync: default configuration */ - i = 0; - while (strcmp("", t_methods[i].label) != 0) { - TIMEIT(t_methods[i].type, t_methods[i].clock_id, - ioctl(fd, NIOCRXSYNC, NULL), ravg, ITERATIONS); - SUCCESSF(": NIOCRXSYNC: multi: %0.6f usec.\n", ravg); - TIMEIT(t_methods[i].type, t_methods[i].clock_id, - ioctl(fd, NIOCTXSYNC, NULL), ravg, ITERATIONS); - SUCCESSF(": NIOCTXSYNC: multi: %0.6f usec.\n", ravg); - i++; - } - - i = 0; - while (strcmp("", t_methods[i].label) != 0) { - TIMEIT(t_methods[i].type, t_methods[i].clock_id, - ioctl(fd, NIOCRXSYNC, NULL), ravg, ITERATIONS); - SUCCESSF(": NIOCRXSYNC: single: %0.6f usec.\n", ravg); - TIMEIT(t_methods[i].type, t_methods[i].clock_id, - ioctl(fd, NIOCTXSYNC, NULL), ravg, ITERATIONS); - SUCCESSF(": NIOCTXSYNC: single: %0.6f usec.\n", ravg); - i++; - } - - ASSERT(ioctl(fd, NIOCUNREGIF, &req) != -1); - - netmap_close(fd); -} - -void -test_speed(const char *ifname) -{ - test_ioctl_speed(ifname); -} diff --git a/private/test/test_speed.h b/private/test/test_speed.h deleted file mode 100644 index e0a4870fb..000000000 --- a/private/test/test_speed.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _NETMAP_TEST_SPEED_H -#define _NETMAP_TEST_SPEED_H - -#include - -/* Enumerate describing the type of method to use for timimng */ -enum timing_type { - TIMING_GTD, /* gettimeofday(2) */ - TIMING_CGT /* clock_gettime(2) */ -}; - -/* Descriptor of timing methods */ -struct timing_method { - char label[128]; /* label/message associated with the method */ - enum timing_type type; /* type of timing method */ - clockid_t clock_id; /* clock identifier used with clock_gettime() */ -}; - - -#define RESULTS(label, value) \ - SUCCESSF(": %0.6f usec\t%s\n", (value), (label)); - -#define TIMEIT(type, clock, x, ravg, n) \ - do { \ - switch ((type)) { \ - case TIMING_GTD: \ - TIMEIT_GTD(x, ravg, n); \ - break; \ - \ - case TIMING_CGT: \ - TIMEIT_CGT(clock, x, ravg, n); \ - break; \ - \ - default: \ - break; \ - } \ - } while (0) - -#define TIMEIT_GTD(x, ravg, n) \ - do { \ - int _i; \ - double _tmp; \ - struct timeval _start, _end; \ - \ - gettimeofday(&_start, NULL); \ - for (_i = 0; _i < (n); _i++) \ - x; \ - gettimeofday(&_end, NULL); \ - _tmp = (_end.tv_usec - _start.tv_usec) + \ - 1000000 * (_end.tv_sec - _start.tv_sec);\ - (ravg) = _tmp / (double) (n); \ - } while (0) - -#define TIMEIT_CGT(clock, x, ravg, n) \ - do { \ - int _i; \ - double _tmp; \ - struct timespec _start, _end; \ - clock_gettime((clock), &_start); \ - for (_i = 0; _i < (n); _i++) \ - x; \ - clock_gettime((clock), &_end); \ - _tmp = (_end.tv_nsec - _start.tv_nsec) / (double) 1000 +\ - 1000000 * (_end.tv_sec - _start.tv_sec); \ - (ravg) = _tmp / (double) (n); \ - } while (0) - -#endif /* _NETMAP_TEST_SPEED_H */ diff --git a/private/test/test_userspace.c b/private/test/test_userspace.c deleted file mode 100644 index 46a636ce7..000000000 --- a/private/test/test_userspace.c +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include -#include /* strcmp */ -#include /* open */ -#include /* close */ -#include /* sigsuspend */ - -#include /* PROT_* */ -#include /* ioctl */ -#include -#include -#include /* sockaddr.. */ -#include /* ntohs */ - -#include -#include -#include /* ifreq */ -#include -#include - -#include "testnetmap.h" -#include "test_device.h" -#include "test_userspace.h" - - -#ifdef VERBOSE -#undef VERBOSE -#endif -#define VERBOSE 1 - - -void -test_userspace(const char *ifname) -{ - int fd; - void *tmp_addr; - struct nmreq ifreq; - struct netmap_if *nifp; - int l; - - fd = netmap_open(); - - - strcpy(ifreq.nr_name, ifname); - ASSERT(ioctl(fd, NIOCREGIF, &ifreq) != -1); - l = ifreq.nr_memsize; - tmp_addr = netmap_mmap(fd, l); - nifp = NETMAP_IF(tmp_addr, ifreq.nr_offset); - - PRINT_NIF(nifp); - - ASSERT(ioctl(fd, NIOCUNREGIF, &ifreq) != -1); - ASSERT(munmap(tmp_addr, l) != -1); - netmap_close(fd); - - SUCCESS(); -} diff --git a/private/test/test_userspace.h b/private/test/test_userspace.h deleted file mode 100644 index 066967ddd..000000000 --- a/private/test/test_userspace.h +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef _NETMAP_TEST_USERSPACE_H_ - - -/* - * Print the ring. - * - * @d netmap descriptor pointer. - * @n netmap_if descriptor pointer. - * @r netmap_ring descriptor pointer. - * @t name of the ring field ([tx,rx]_rings). - * @nd number of descriptors inside the ring. - * @ds size of each descriptor. - * - * Print only the slots containing data. - * Example: - * head: 10 - * tail: 8 - */ -#define PRINT_NIF_RING(d, n, r, t, nd, ds) \ -do { \ - printf("head: %d\n" \ - "tail: %d\n" \ - "", \ - (r)->head, \ - (r)->tail \ - ); \ -} while (0) - - -/* - * Print the i-th transmit ring. - * - * @d netmap descriptor pointer. - * @n netmap_if descriptor pointer. - * @i index of the ring. - */ -#define PRINT_NIF_TX_RING(d, n, i) \ - PRINT_NIF_RING(d, n, NETMAP_TXRING(n, i), \ - tx_rings, (n)->num_tx_descs, (n)->tx_desc_size) - - -/* - * Print the i-th receive ring. - * - * @d netmap descriptor pointer. - * @n netmap_if descriptor pointer. - * @i index of the ring. - */ -#define PRINT_NIF_RX_RING(d, n, i) \ - PRINT_NIF_RING(d, n, NETMAP_RX_RING(d, n, i), \ - rx_rings, (n)->num_rx_descs, (n)->rx_desc_size) - - -/* - * Print a netmap interface descriptor. - * - * @n netmap_if descriptor. - * - * Example: - * netmap-interface: - * ----------------- - * Name: em0 # queues: 1 - */ -#define PRINT_NIF(n) \ -do { \ - printf("netmap-interface:\n" \ - "-----------------\n" \ - "Name: %s #queues: %d #desc-per-que: %d\n" \ - "", \ - (n)->ni_name, \ - (n)->ni_num_queues, \ - NETMAP_TXRING(n, 0)->num_slots \ - ); \ -} while (0) - - - -/* - * Print an ethernet address. - * - * A colon symbol is added between each character, and a new-line - * character is put at the end. - * Example: - * 08:00:27:3d:43:fd - */ -#define PRINT_ETH_ADDR(addr) \ -do { \ - int _i; \ - u_char *_ptr; \ - \ - _ptr = (addr); \ - _i = ETHER_ADDR_LEN; \ - do { \ - printf("%s%02x", \ - (_i == ETHER_ADDR_LEN) ? "" : ":", \ - *_ptr++); \ - } while (--_i > 0); \ - printf("\n"); \ -} while (0) - -/* - * Print an Ethernet header. - * - * Example: - * Ethernet header: - * ---------------- - * Type: 0800 - * Source: 08:00:27:3d:43:fd - * Destination: ff:ff:ff:ff:ff:ff - */ -#define PRINT_ETH_PKT(eh) \ -do { \ - printf("Ethernet header:\n"); \ - printf("----------------\n"); \ - printf("Type: %04x\n", ntohs((eh)->ether_type)); \ - printf("Source Address: "); \ - PRINT_ETH_ADDR((eh)->ether_shost); \ - printf("Destination Address: "); \ - PRINT_ETH_ADDR((eh)->ether_dhost); \ -} while (0) - - -/* - * Print an Arp packet. - * - * Example: - * Arp header: - * ----------- - * htype: 0001 ptype: 0800 - * hlen: 6 plen: 4 - * oper: 1 - * sha: 52:54:00:12:34:56 - * spa: 10.0.2.222 - * tha: 00:00:00:00:00:00 - * tpa: 10.0.2.15 - */ -#define PRINT_ARP_PKT(arp) \ -do { \ - printf("Arp header:\n"); \ - printf("-----------\n"); \ - printf("htype: %04x\tptype: %04x\n", \ - ntohs(arp->htype), ntohs(arp->ptype)); \ - printf("hlen: %u\tplen: %u\n", arp->hlen, arp->plen); \ - printf("oper: %d\n", ntohs(arp->oper)); \ - printf("sha: "); \ - PRINT_ETH_ADDR(arp->sha); \ - printf("spa: %s\n", inet_ntoa(*(struct in_addr *) arp->spa)); \ - printf("tha: "); \ - PRINT_ETH_ADDR(arp->tha); \ - printf("tpa: %s\n", inet_ntoa(*(struct in_addr *) arp->tpa)); \ -} while (0) - - -/* - * Print an IP header. - * - * Example: - * Ip header: - * ---------- - * Length: 54 - * Source: 192.168.0.1 - * Destination: 192.168.0.22 - */ -#define PRINT_IP_PKT(ip) \ -do { \ - printf("Ip header:\n"); \ - printf("----------\n"); \ - printf("Length: %u\n", ntohs((ip)->ip_len)); \ - printf("Source Address: %s\n", inet_ntoa((ip)->ip_src)); \ - printf("Destination Address: %s\n", \ - inet_ntoa((ip)->ip_dst)); \ -} while (0) - - -#endif /* _NETMAP_TEST_USERSPACE_H_ */ diff --git a/private/test/testnetmap.c b/private/test/testnetmap.c deleted file mode 100644 index 4fec538aa..000000000 --- a/private/test/testnetmap.c +++ /dev/null @@ -1,22 +0,0 @@ -#include - -#include "testnetmap.h" - - -int -main(int argc, char **argv) -{ - if (argc != 2) { - printf("Usage: %s \n", argv[0]); - return (1); - } - - - test_device(argv[1]); - - test_userspace(argv[1]); - - test_speed(argv[1]); - - return (0); -} diff --git a/private/test/testnetmap.h b/private/test/testnetmap.h deleted file mode 100644 index 3f2aef1a1..000000000 --- a/private/test/testnetmap.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _NETMAP_TEST_H_ -#define _NETMAP_TEST_H_ - -#include -#include /* exit */ - -#define VERBOSE 1 - -#define ASSERT(x) \ - do { \ - if (!(x)) { \ - printf("In function '%s':\n", __func__); \ - printf("%s:%d: fail: " #x ": %s\n", \ - __FILE__, __LINE__, strerror(errno)); \ - exit(1); \ - } \ - } while (0) - -#define SUCCESSF(...) \ - do { \ - if (VERBOSE) { \ - printf("Success: %s", __func__); \ - printf(__VA_ARGS__); \ - } \ - } while (0) - -#define SUCCESS() SUCCESSF("\n") - - -void test_device(const char *ifname); -void test_speed(const char *ifname); -void test_userspace(const char *ifname); - -#endif /* _NETMAP_TEST_H_ */ diff --git a/private/tools/luigi.sh b/private/tools/luigi.sh deleted file mode 100644 index c362aa2a2..000000000 --- a/private/tools/luigi.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh -# -# commands to sync the files in netmap -# sh ... --netmap netmap_tree --src bsd_tree [diff|patch|revert] - -# MYFILES is the list of kernel files modified -FREEBSD_TREE=${HOME}/FreeBSD/head -NETMAP_TREE=/usr/ports-luigi/netmap-release -MY_FILES="\ - conf/NOTES conf/files conf/options \ - dev/e1000/if_igb.c dev/e1000/if_lem.c dev/e1000/if_em.c \ - dev/re/if_re.c \ - dev/bge/if_bge.c \ - dev/ixgbe/ixgbe.c \ - " - -while [ true ] ; do - case $1 in - --netmap) # netmap tree - NETMAP_TREE=$2; - shift - ;; - --src) # FreeBSD tree - FREEBSD_TREE=$2 - shift - ;; - --dry) # dry run - DRY=-C - ;; - --h*) # help - echo "sh ... --netmap netmap_tree --src bsd_tree [diff|patch|revert] " - exit 0 - ;; - - diff) # compute diffs - (cd $FREEBSD_TREE/sys; svn diff $MY_FILES) - ;; - revert) # remove additional files - (cd $FREEBSD_TREE/sys; svn revert $MY_FILES; \ - rm dev/netmap; rm net/netmap*) - ;; - patch) # compute diffs - in=$2 - [ x"$in" = x ] && in=$NETMAP_TREE/head-netmap.diff - (cd $FREEBSD_TREE/sys; patch ${DRY} < $in ; \ - ln -s $NETMAP_TREE/sys/dev/netmap dev/netmap; \ - ln -s $NETMAP_TREE/sys/net/netmap.h net/netmap.h; \ - ln -s $NETMAP_TREE/sys/net/netmap_user.h net/netmap_user.h; \ - ) - ;; - *) - break; - esac - shift; -done diff --git a/private/tools/qemu/PICOBSD b/private/tools/qemu/PICOBSD deleted file mode 100644 index d57913e60..000000000 --- a/private/tools/qemu/PICOBSD +++ /dev/null @@ -1,154 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD 201065 2009-12-27 22:34:31Z luigi $ -# A configuration file to run tests on qemu. -# We disable SMP because it does not work well with qemu, and set HZ=1000 -# to avoid it being overridden. -# -# Line starting with #PicoBSD contains PicoBSD build parameters -#marker def_sz init MFS_inodes floppy_inodes -#PicoBSD 26000 init 8192 32768 -options MD_ROOT_SIZE=26000 # same as def_sz - -hints "PICOBSD.hints" - -# values accessible through getenv() -# env "PICOBSD.env" - -#cpu I486_CPU -#cpu I586_CPU -cpu I686_CPU -ident PICOBSD - -options SMP -device acpi # more frequencies ? -device apic # kern_et ? -device cpufreq - -option INVARIANTS -option INVARIANT_SUPPORT -options SCHED_ULE # mandatory to have one scheduler -options PREEMPTION -#options MATH_EMULATE #Support for x87 emulation -options INET #InterNETworking -options INET6 -options FFS #Berkeley Fast Filesystem -#options BOOTP #Use BOOTP to obtain IP address/hostname -options MD_ROOT #MD is a potential root device - -#options NFS #Network Filesystem -#options NFS_ROOT #NFS usable as root device, NFS required - -#options MSDOSFS #MSDOS Filesystem -#options CD9660 #ISO 9660 Filesystem -#options CD9660_ROOT #CD-ROM usable as root, CD9660 required -#options DEVFS #Device Filesystem -#options PROCFS #Process filesystem -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] - -options KDB -options DDB - -options IPFIREWALL -options IPFIREWALL_DEFAULT_TO_ACCEPT -options IPDIVERT # divert (for natd) - -# Support for bridging and bandwidth limiting -options DUMMYNET -options IPFIREWALL_NAT -options LIBALIAS -device if_bridge -# Running with less than 1000 seems to give poor timing on -# qemu, so we set HZ explicitly. -options HZ=1000 - -device random # used by ssh -device pci - -# Floppy drives -device fdc - -# ATA and ATAPI devices -#device ata -#device atadisk # ATA disk drives -#device atapicd # ATAPI CDROM drives -#options ATA_STATIC_ID #Static device numbering - -# atkbdc0 controls both the keyboard and the PS/2 mouse -device atkbdc # At keyboard controller -device atkbd -#device psm # do we need the mouse ?? - -device vga # VGA screen - -# syscons is the default console driver, resembling an SCO console -device sc - -# Serial (COM) ports -device uart - -# Audio support -#device pcm - -# PCCARD (PCMCIA) support -#device card # pccard bus -#device pcic # PCMCIA bridge - -# Parallel port -#device ppc -#device ppbus # Parallel port bus (required) -#device lpt # Printer -#device plip # TCP/IP over parallel -#device ppi # Parallel port interface device - -# -# The following Ethernet NICs are all PCI devices. -# -device miibus -device ixgbe -device cxgbe # chelsio -device firmware # needed for chelsio ? - -device em -device bge -#device fxp # Intel EtherExpress PRO/100B (82557, 82558) -device nfe # nVidia nForce MCP on-board Ethernet -#device xl # 3Com -device rl # RealTek 8129/8139 -device re # RealTek 8139C+/8169/8169S/8110S -device sis # National/SiS -device dc # DEC/Intel 21143 and various workalikes -device ed - -device loop # Network loopback -device ether # Ethernet support -device tun # Packet tunnel. -device pty # Pseudo-ttys (telnet etc) -device md # Memory "disks" -#device gif 4 # IPv6 and IPv4 tunneling -#device faith 1 # IPv6-to-IPv4 relaying (translation) -device tap - -#-- usb support -device uhci # UHCI PCI->USB interface -device ohci # OHCI PCI->USB interface -device ehci # EHCI PCI->USB interface (USB 2.0) -device usb -device uhid # "Human Interface Devices" -device ukbd # Keyboard -device scbus -device da -device umass # Disks/Mass storage - Requires scbus and da -device ums # Mouse - - -device kbdmux -options KBD_INSTALL_CDEV - -#options VIMAGE - -#options DEVICE_POLLING - -# The `bpf' device enables the Berkeley Packet Filter. -# Be aware of the administrative consequences of enabling this! -device bpf # Berkeley packet filter -device netmap diff --git a/private/tools/qemu/PICOBSD.hints b/private/tools/qemu/PICOBSD.hints deleted file mode 100644 index cdb038ba4..000000000 --- a/private/tools/qemu/PICOBSD.hints +++ /dev/null @@ -1,39 +0,0 @@ -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/PICOBSD.hints 201065 2009-12-27 22:34:31Z luigi $ -hint.fdc.0.at="isa" -hint.fdc.0.port="0x3F0" -hint.fdc.0.irq="6" -hint.fdc.0.drq="2" -hint.fd.0.at="fdc0" -hint.fd.0.drive="0" -hint.ata.0.at="isa" -hint.ata.0.port="0x1F0" -hint.ata.0.irq="14" -hint.ata.1.at="isa" -hint.ata.1.port="0x170" -hint.ata.1.irq="15" -hint.atkbdc.0.at="isa" -hint.atkbdc.0.port="0x060" -hint.atkbd.0.at="atkbdc" -hint.atkbd.0.irq="1" -hint.psm.0.at="atkbdc" -hint.psm.0.irq="12" -hint.vga.0.at="isa" -hint.sc.0.at="isa" -hint.npx.0.at="nexus" -hint.npx.0.port="0x0F0" -hint.npx.0.irq="13" -hint.uart.0.at="isa" -hint.uart.0.port="0x3F8" -hint.uart.0.flags="0x10" -hint.uart.0.irq="4" -hint.uart.1.at="isa" -hint.uart.1.port="0x2F8" -hint.uart.1.irq="3" -hint.ed.0.at="isa" -hint.ed.0.port="0x280" -hint.ed.0.irq="5" -hint.ed.0.maddr="0xd8000" -hint.ed.1.at="isa" -hint.ed.1.port="0x300" -hint.ed.1.irq="5" -hint.ed.1.maddr="0xd0000" diff --git a/private/tools/qemu/config b/private/tools/qemu/config deleted file mode 100644 index da5ac7538..000000000 --- a/private/tools/qemu/config +++ /dev/null @@ -1,68 +0,0 @@ -# configuration for picobsd build script. -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/config 201065 2009-12-27 22:34:31Z luigi $ -# it should only contain variable definitions -- it is sourced -# by the shell much like rc.conf* files - -fd_size="12000" - -# You can use it e.g. in a local configuration file by writing -# -# do_copyfiles_user() { -# local dst=$1 -# find_progs nvi sed less grep -# cp -p ${u_progs} ${dst}/bin -# cp -p ${u_libs} ${dst}/lib -# mkdir -p ${dst}/libexec -# find_progs ld-elf.so.1 -# cp -p ${u_progs} ${dst}/libexec -# } -#copy_files=" -#" -do_copyfiles_user() { - local dst=$1 - log "--- called do_copyfiles_user" - - mkdir -p ${dst}/usr/lib - - find_progs -L / -P /usr/ports/security/dropbear/work/dropbear-0.52 \ - dbclient dropbear - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - - find_progs -L / /usr/bin/ssh /usr/bin/scp /usr/sbin/sshd - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - - #find_progs -L / -P /usr/local/bin trafshow - #cp -p ${u_progs} ${dst}/bin - #cp -p ${u_libs} ${dst}/usr/lib - - local d=/home/matteo/workspace/netmap/v2/netmap-v2/examples - find_progs -L / -P $d pkt-gen pkt-gen-pcap bridge pingd - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - - local d=/home/matteo/workspace/netmap/v2/netmap-v2/test - find_progs -L / -P $d interrupt_stats - cp -p ${u_progs} ${dst}/bin - cp -p ${u_libs} ${dst}/usr/lib - - #find_progs -L $d -P $d libnetmap.so - #cp -p ${u_progs} ${dst}/bin/libpcap.so.7 - - -# find_progs -L $d /tmp/click -# cp -p ${u_progs} ${dst}/bin -# cp -p ${u_libs} ${dst}/usr/lib - -# find_progs -L /usr/local/lib -P /usr/local/bin tcpreplay -# cp -p ${u_progs} ${dst}/bin -# cp -p ${u_libs} ${dst}/usr/lib - - cp -p /usr/bin/vi ${dst}/bin/vi - -# cp -p /tmp/tcpreplay ${dst}/root -# cp -p /tmp/netsend ${dst}/bin -# cp -p /tmp/a.pcap ${dst}/root -# cp -p /tmp/open_key/* ${dst}/root -} diff --git a/private/tools/qemu/crunch.conf b/private/tools/qemu/crunch.conf deleted file mode 100644 index 77179efb4..000000000 --- a/private/tools/qemu/crunch.conf +++ /dev/null @@ -1,209 +0,0 @@ -# -# $FreeBSD: user/luigi/ipfw3-head/release/picobsd/qemu/crunch.conf 201065 2009-12-27 22:34:31Z luigi $ -# -# Configuration file for "qemu" images.. -# -# Depending on your needs, you will almost surely need to -# add/remove/change programs according to your needs. -# Remember that some programs require matching kernel options to -# enable device drivers etc. -# -# To figure out how much space is used by each program, do -# -# size build_dir-bridge/crunch/*lo -# -# Remember that programs require libraries, which add up to the -# total size. The final binary is build_dir-bridge/mfs.tree/stand/crunch -# and you can check which libraries it uses with -# -# ldd build_dir-bridge/mfs.tree/stand/crunch - -# crunchgen configuration to build the crunched binary, see "man crunchgen" -# We need to specify generic build options, the places where to look -# for sources, and the list of program and libraries we want to put -# in the crunched binary. -# -# NOTE: the string "/usr/src" below will be automatically replaced with -# the path set in the 'build' script. - -# Default build options. Basically tell the Makefiles -# that to use the most compact possible version of the code. - -buildopts -DNO_PAM -DRELEASE_CRUNCH -DPPP_NO_NETGRAPH -buildopts -DTRACEROUTE_NO_IPSEC # -DNO_INET6 -buildopts -DWITHOUT_IPX - -# Directories where to look for sources of various binaries. -# @__CWD__@ is a magic keyword in the picobsd's (Makefile.conf) -# which is replaced with the directory with the picobsd configuration -# corresponding to your image. This way you can have custom sources -# in that directory overriding system programs. - -srcdirs @__CWD__@/src - -# Some programs are especially written for PicoBSD and reside in -# release/picobsd/tinyware. -# Put this entry near the head of the list to override standard binaries. - -srcdirs /usr/src/release/picobsd/tinyware - -# Other standard locations for sources. -# If a program uses its own source directory, add - -srcdirs /usr/src/bin -srcdirs /usr/src/sbin/i386 -srcdirs /usr/src/sbin -srcdirs /usr/src/usr.bin -srcdirs /usr/src/gnu/usr.bin -srcdirs /usr/src/usr.sbin -srcdirs /usr/src/libexec - -# For programs that reside in different places, the best option -# is to use the command "special XXX srcdir YYY" where XXX is the -# program name and YYY is the directory path. -# "special XXX ..." can be used to specify more options, see again -# the crunchgen manpage. - -#--- Basic configuraton -# init is always necessary (unless you have a replacement, oinit) -progs init - -# fsck is almost always necessary, unless you have everything on the -# image and use 'tar' or something similar to read/write raw blocks -# from the floppy. - -progs fsck - -# ifconfig is needed if you want to configure interfaces. -progs ifconfig - -# You will also need a shell and a bunch of utilities. -# The standard shell is not that large, but you need many -# external programs. In fact most of them do not take much space -# as they merely issue a system call, and print the result. -# For a more compact version of shell and utilities, you could -# try busybox, however most system management commands in busybox -# will not work as they use linux-specific interfaces. - -progs sh -ln sh -sh - -# the small utilities -progs echo -progs pwd mkdir rmdir -progs chmod chown -ln chown chgrp -progs mv ln cp rm ls -progs cat tail tee -progs test -ln test [ - -progs less -ln less more -progs mount -progs minigzip -ln minigzip gzip -progs kill -progs df -progs ps -progs ns # this is the picobsd version -ln ns netstat -progs vm -progs hostname -progs login -progs getty -progs stty -progs w -progs msg -ln msg dmesg -progs reboot - -progs sysctl -progs swapon -progs pwd_mkdb -progs umount -progs du -progs passwd - -progs route - -# If you want to run natd, remember the alias library -progs natd -libs_so -lalias # natd -progs tcpdump -special tcpdump srcdir /usr/src/usr.sbin/tcpdump/tcpdump -libs_so -lpcap # used by tcpdump -libs_so -lcrypto # used by tcpdump with inet6 - -# ppp is rather large. Note that as of Jan.01, RELEASE_CRUNCH -# makes ppp not use libalias, so you cannot have aliasing. -#progs ppp - -# You need an editor. ee is relatively small, though there are -# smaller ones. vi is much larger. -# The editor also usually need a curses library. -progs ee - -progs arp - -# these require libgeom -# progs bsdlabel fdisk mdconfig - -progs kldload kldunload kldstat -progs kldxref -progs grep -libs_so -lgnuregex -lbz2 -# dhclient-script requires 'sed' -progs dhclient -progs sed -progs date -progs time -progs ping -progs ping6 -progs tar - -progs top -progs pciconf - -#progs routed -progs ipfw -progs traceroute -progs mdmfs -ln mdmfs mount_mfs -# Various filesystem support -- remember to enable the kernel parts -# progs mount_msdosfs -progs mount_nfs -# progs mount_cd9660 -ln mount_nfs nfs -ln mount_cd9660 cd9660 -#progs newfs -#ln newfs mount_mfs -# ln mount_msdosfs msdos - -progs jail jexec jls - -# For a small ssh client/server use dropbear - -# Now the libraries -libs_so -lc # the C library -libs_so -ll # used by sh (really ?) -libs_so -lufs # used by mount -### ee uses ncurses but as a dependency -#libs_so -lncurses -libs_so -lm -libs_so -ledit -lutil -libs_so -lcrypt -libs_so -lkvm -libs_so -lz -libs_so -lbsdxml -libs_so -lsbuf -libs_so -ljail # used by ifconfig -libs_so -lulog -libs_so -lipsec -lmd -libs_so -larchive -lbz2 -libs_so -llzma # added after 207840 - -progs vmstat -libs_so -lmemstat -libs_so -ldevstat -progs cpuset diff --git a/private/tools/qemu/floppy.tree.exclude b/private/tools/qemu/floppy.tree.exclude deleted file mode 100644 index adfc6cc75..000000000 --- a/private/tools/qemu/floppy.tree.exclude +++ /dev/null @@ -1,2 +0,0 @@ -etc/snmpd.conf -etc/ppp diff --git a/private/tools/qemu/floppy.tree/etc/motd b/private/tools/qemu/floppy.tree/etc/motd deleted file mode 100644 index 91d66f5e0..000000000 --- a/private/tools/qemu/floppy.tree/etc/motd +++ /dev/null @@ -1,9 +0,0 @@ -============================================================== - - )\_)\ Welcome to PicoBSD, netmap demo image - (o,o) - __ \~/ Root password is "setup" - -->====\ - ~~ d d see http://info.iet.unipi.it/~luigi/netmap/ - -============================================================== diff --git a/private/tools/qemu/floppy.tree/etc/rc.conf.defaults b/private/tools/qemu/floppy.tree/etc/rc.conf.defaults deleted file mode 100644 index 70e3767ad..000000000 --- a/private/tools/qemu/floppy.tree/etc/rc.conf.defaults +++ /dev/null @@ -1,188 +0,0 @@ -#!/bin/sh -# $FreeBSD: head/release/picobsd/floppy.tree/etc/rc.conf.defaults 91949 2002-03-09 18:27:02Z luigi $ -# -# rc.conf for picobsd. This is sourced from /etc/rc1, and is supposed to -# contain only shell functions that are used later in /etc/rc1. - -# set default values for variables. Boolean values should be either -# NO or YES -- other values are not guaranteed to work. - -rc_conf_set_defaults() { -hostname="" # Should not need to set it -syslogd_enable="NO" -pccard_enable="NO" -swapfile="" # name of swapfile if aux swapfile desired. - -# Network interface configurations: ifconfig_${interface}[_aliasNN] -ifconfig_lo0="inet 127.0.0.1" # default loopback device configuration. -#ifconfig_lo0_alias0="inet 127.0.0.254 netmask 0xffffffff" # Sample alias entry. - -### Network daemons options: they are only run if present. -sshd_enable="YES" # if present... -inetd_enable="YES" # Run the network daemon dispatcher (or NO) -inetd_flags="" # Optional flags to inetd -snmpd_enable="NO" # Run the SNMP daemon (or NO) -snmpd_flags="-C -c /etc/snmpd.conf" # Optional flags to snmpd - -### Network routing options: ### -defaultrouter="NO" # Set to default gateway (or NO). -static_routes="" # Set to static route list (or leave empty). -gateway_enable="NO" # Set to YES if this host will be a gateway. -arpproxy_all="" # replaces obsolete kernel option ARP_PROXYALL. -default_mask="0xffffff00" - -### Other network features -firewall_enable="NO" -firewall_quiet="NO" # be quiet if set. -firewall_type="" # Standard types or absolute pathname. -tcp_extensions="NO" # Allow RFC1323 & RFC1644 extensions (or NO). - -### Overrides for some files in /etc. Leave empty if no override, -### set variable (remember to use multiple lines) to override content. - -host_conf="hosts -bind" -resolv_conf="" -} - -# Try to identify the system by using the MAC address and name of the -# first ethernet interface, made available as $main_eth $main_if -find_system_id() { - main_ether="" - for main_if in `ifconfig -l` ; do - set `ifconfig $main_if` - while [ "$1" != "" ] ; do - if [ $1 = "ether" ] ; then - main_ether=$2 - break 2 - else - shift - fi - done - done -} - -# the following lets the user specify a name and ip for his system -read_address() { - ## XXX disabled - hostname=default - return # - - echo "Please enter a hostname and IP address for your system $main_ether" - read hostname the_ip - if [ "${hostname}" != "" ] ; then - echo "# $main_ether $hostname" >> /etc/hosts - echo "$the_ip $hostname" >> /etc/hosts - else - hostname=default - fi -} - -# set "ether" using $1 (interface name) as search key -get_ether() { - local key - key=$1 - ether="" - set `ifconfig ${key}` - while [ "$1" != "" ] ; do - if [ "$1" = "ether" ] ; then - ether=$2 - break - else - shift - fi - done -} - -# read content from /etc/hosts into a couple of arrays -# (needed later in fetch_hostname) -read_hosts() { - local i a b c key junk - i="" - while read a b c junk ; do - if [ "$a" = "#ethertable" ] ; then - i=0 - elif [ "$i" != "" -a "$a" = "#" -a "$b" != "" ] ; then - eval eth_${i}=$b - eval eth_host_${i}=$c - i=$(($i+1)) - fi - done < /etc/hosts -} - -# set ${hostname} using $1 (MAC address) as search key in /etc/hosts -# Returns empty value if $1 is empty -fetch_hostname() { - local i b key - hostname="" - [ "$1" = "" ] && return - key=$1 - i=0 - b="x" - [ "${eth_0}" = "" ] && read_hosts # fill cache. - while [ "$b" != "" -a "${hostname}" = "" ] ; do - eval b=\${eth_${i}} - case X${key} in - X${b} ) # so we can use wildcards - eval hostname=\${eth_host_${i}} - break - ;; - esac - i=$(($i+1)) - done - echo "fetch_hostname for <${key}> returns <${hostname}>" -} - -# sets "mask" using $1 (netmask name) as the search key in /etc/networks -fetch_mask() { - local a b key junk - key=$1 # search key, typically hostname-netmask - mask="" - while read a b junk; do # key mask otherstuff - case X${key} in - X${a} ) # The X is so we can use wildcards in ${a} - mask=$b - break - ;; - esac - done < /etc/networks - if [ "${mask}" = "" ] ; then - mask=${default_mask} - fi - echo "fetch_mask for <${key}> returns <${mask}>" -} - -# set hostname, and ifconfig_${main_if} (whose MAC is ${main_ether}) -# if not found, read from console -set_main_interface() { - if [ -z "${hostname}" ] ; then - if [ -z "${main_ether}" ] ; then - echo "No ethernets found, using localhost" - hostname=localhost - return - fi - fetch_hostname ${main_ether} - fi - - [ -z "${hostname}" -o "${hostname}" = "." ] && read_address - - fetch_mask ${hostname}-netmask - - eval ifconfig_${main_if}=\" \${hostname} netmask \${mask}\" - network_interfaces=`ifconfig -l` -} - -# set ifconfig_${interface} for all other interfaces -set_all_interfaces() { - local i ether hostname mask - - for i in `ifconfig -l` ; do - if [ "$i" != "${main_if}" ] ; then - get_ether $i - fetch_hostname ${ether} - fetch_mask ${hostname}-netmask - [ -n "${ether}" -a -n "${hostname}" ] && \ - eval ifconfig_${i}=\" \${hostname} netmask \${mask}\" - fi - done -} diff --git a/private/tools/qemu/floppy.tree/etc/sysctl.conf b/private/tools/qemu/floppy.tree/etc/sysctl.conf deleted file mode 100644 index 295d78558..000000000 --- a/private/tools/qemu/floppy.tree/etc/sysctl.conf +++ /dev/null @@ -1 +0,0 @@ -net.inet.icmp.icmplim=0 diff --git a/private/tools/qemu/floppy.tree/root/.profile b/private/tools/qemu/floppy.tree/root/.profile deleted file mode 100644 index eea8e2439..000000000 --- a/private/tools/qemu/floppy.tree/root/.profile +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -./test f1 diff --git a/private/tools/qemu/floppy.tree/root/bri.click b/private/tools/qemu/floppy.tree/root/bri.click deleted file mode 100644 index 00f51fe9e..000000000 --- a/private/tools/qemu/floppy.tree/root/bri.click +++ /dev/null @@ -1,19 +0,0 @@ -// -// $Id$ -// -// A sample test configuration for click -// -// -// create a switch - -sw :: EtherSwitch; - -// two input devices - -c0 :: FromDevice(ix0, BURST 30, PROMISC true); -c1 :: FromDevice(ix1, BURST 30, PROMISC true); - -// and now pass packets around - -c0[0] -> [0]sw[0] -> Queue(10000) -> ToDevice(ix0); -c1[0] -> [1]sw[1] -> Queue(10000) -> ToDevice(ix1); diff --git a/private/tools/qemu/floppy.tree/root/rates b/private/tools/qemu/floppy.tree/root/rates deleted file mode 100644 index 3158e38eb..000000000 --- a/private/tools/qemu/floppy.tree/root/rates +++ /dev/null @@ -1,2 +0,0 @@ -I80211b_11M - diff --git a/private/tools/qemu/floppy.tree/root/start_test b/private/tools/qemu/floppy.tree/root/start_test deleted file mode 100644 index b4f267f27..000000000 --- a/private/tools/qemu/floppy.tree/root/start_test +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -sysctl dev.cpu.0.freq=1200 -sysctl dev.cpu.0.freq=2934 -sysctl dev.ix.0.flow_control=0 -sysctl dev.ix.1.flow_control=0 -ifconfig ix0 up -ifconfig ix1 up diff --git a/private/tools/qemu/floppy.tree/root/t1.ck b/private/tools/qemu/floppy.tree/root/t1.ck deleted file mode 100644 index ac143f0ff..000000000 --- a/private/tools/qemu/floppy.tree/root/t1.ck +++ /dev/null @@ -1,11 +0,0 @@ -// test1.ck -s :: InfiniteSource(LENGTH 64, BURST 1, NOTS true) -// -> q :: Queue -// -> c :: Counter - -> d :: Discard(BURST 1); - -DriverManager( - wait 1s, write s.active false, - //print "done $(d.count) packets $(q.drops) drops in 1s" - print "done $(d.count) packets drops in 1s" -); diff --git a/private/tools/qemu/floppy.tree/root/t2.ck b/private/tools/qemu/floppy.tree/root/t2.ck deleted file mode 100644 index c39325ef3..000000000 --- a/private/tools/qemu/floppy.tree/root/t2.ck +++ /dev/null @@ -1,14 +0,0 @@ -// test1.ck -FromDevice(ix0, BURST 100) -> Discard; - -s :: FromDevice(ix1, BURST 100) -> Queue -> ToDevice(ix0, BURST 100); - -DriverManager( - set a 0, - label x, - wait 1s, - set b $(s.count), - print "done $(sub $b $a) packets in 1s", - set a $b, - goto x 1 -); diff --git a/private/tools/qemu/floppy.tree/root/test b/private/tools/qemu/floppy.tree/root/test deleted file mode 100755 index 146fac520..000000000 --- a/private/tools/qemu/floppy.tree/root/test +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/sh - -f1() { # default setting - sysctl kern.timecounter.hardware=PIIX - return - ifconfig ed2 delete - dhclient ed2 - sysctl net.inet.ip.fw.verbose=1 -} - -# test tables -f2() { - ipfw table 2 add 22 2000 - ipfw table 2 add 53 3000 - ipfw table 2 add 80 4000 - ipfw table 2 add 127.0.0.1 5000 - ipfw table 2 list -} - -f3() { - ipfw -q flush - ipfw add 100 count log out - ipfw add 200 skipto tablearg lookup dst-port 2 - ipfw add 300 skipto tablearg lookup dst-ip 2 - ipfw add 1000 allow ip from any to any - ipfw add 2000 allow ip from any to any - ipfw add 3000 allow ip from any to any - ipfw add 4000 allow ip from any to any - ipfw add 5000 allow ip from any to any -} - -f4() { - ipfw -q flush - echo > /etc/libalias.conf - sysctl net.inet.ip.fw.verbose=1 - ipfw add 100 divert natd log ip from any to any - ipfw add 100 count ip from any to any - ipfw add 200 count ip from any to any - natd -v -interface ed2 & -} - -f5() { - ipfw pipe 10 config bw 80kbit/s - ipfw add 100 pipe 10 ip from any to any - ipfw pipe show -} - -# test queues -f6() { - ipfw pipe 1 config bw 400kbit/s queue 30 - #ipfw pipe 2 config bw 180kbit/s - #ipfw pipe 3 config delay 30ms queue 100kbytes - ipfw queue 11 config sched 1 weight 1 - ipfw queue 12 config sched 1 weight 2 - ipfw queue 14 config sched 1 weight 4 - ipfw queue 18 config sched 1 weight 8 - ipfw -q flush - ipfw add 100 queue 11 src-ip 0&3 // low bits 00 - ipfw add 100 queue 12 src-ip 1&3 // low bits 01 - ipfw add 100 queue 14 src-ip 2&3 // low bits 10 - ipfw add 100 queue 18 src-ip 3&3 // low bits 11 -} - -# jail test -f7() { -jail -c -nXX vnet path=/ host.hostname=test.me persist=true command=/bin/sh -} - -f8() { - ipfw add 100 queue tablearg lookup dscp 1 - ipfw queue 10 config sched 5 mask queue - ipfw queue 20 config sched 5 mask queue - ipfw queue 30 config sched 5 mask queue - ipfw pipe 5 config bw 80Kbit/s - # ipfw table 1 add 0 10 - ipfw table 1 add 1 20 - ipfw table 1 add 2 30 - ipfw table 1 list -} - -case $1 in - f[0-9]*) $* ;; -esac diff --git a/private/tools/qemu/floppy.tree/root/test_coarse.sh b/private/tools/qemu/floppy.tree/root/test_coarse.sh deleted file mode 100644 index 21d4dce7c..000000000 --- a/private/tools/qemu/floppy.tree/root/test_coarse.sh +++ /dev/null @@ -1,35 +0,0 @@ -#info -INFO=131.114.58.84 -OL5=131.114.59.241 - -# -# test per vedere l'andamento degli ack su download da info -# la wmem non influisce, ma ne tengo nota -#echo "4096 16384 65536" > /proc/sys/net/ipv4/tcp_wmem -#./ipfw/ipfw pipe 1 config coarse rates queue 800KBytes -#./ipfw/ipfw add 1 pipe 1 all from $INFO to $OL5 src-port 80 -#./ipfw/ipfw add 2 pipe 1 all from $OL5 to $INFO dst-port 80 - -# -# test per vedere l'andamento degli ack su download da info -# la wmem non influisce, ma ne tengo nota -# PRIORITA' AGLI ACK -#echo "4096 16384 65536" > /proc/sys/net/ipv4/tcp_wmem -#./ipfw/ipfw pipe 1 config coarse rates queue 800KBytes -#./ipfw/ipfw queue 1 config pipe 1 weight 1 -#./ipfw/ipfw queue 2 config pipe 1 weight 10 -#./ipfw/ipfw add 1 queue 1 all from $INFO to $OL5 src-port 80 -#./ipfw/ipfw add 2 queue 2 all from $OL5 to $INFO dst-port 80 - -#ifconfig lo mtu 1500 -#./ipfw/ipfw pipe 1 config coarse rates -#./ipfw/ipfw add 1 pipe 1 all from 131.114.59.241 to 131.114.59.241 dst-port 80 in -#./ipfw/ipfw add 2 pipe 1 all from 131.114.59.241 to 131.114.59.241 src-port 80 out - -# the default mtu for loopback is 16k, use 1500 instead -ifconfig lo mtu 1500 -# configure the pipe and the rules -./ipfw/ipfw pipe 1 config coarse rates -./ipfw/ipfw add 1 pipe 1 all from any to 127.0.0.1 dst-port 80 in -./ipfw/ipfw add 2 pipe 1 all from any to 127.0.0.1 src-port 80 out - diff --git a/private/tools/qemu/floppy.tree/test b/private/tools/qemu/floppy.tree/test deleted file mode 100755 index d1a3fcc05..000000000 --- a/private/tools/qemu/floppy.tree/test +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh - -f1() { - sysctl kern.timecounter.hardware=i8254 - ifconfig ed2 delete - dhclient ed2 - sysctl net.inet.ip.fw.verbose=1 -} - -f2() { - ipfw table 2 add 22 2000 - ipfw table 2 add 53 3000 - ipfw table 2 add 80 4000 - ipfw table 2 add 127.0.0.1 5000 - ipfw table 2 list -} - -f3() { - ipfw -q flush - ipfw add 100 count log out - ipfw add 200 skipto tablearg lookup dst-port 2 - ipfw add 300 skipto tablearg lookup dst-ip 2 - ipfw add 1000 allow ip from any to any - ipfw add 2000 allow ip from any to any - ipfw add 3000 allow ip from any to any - ipfw add 4000 allow ip from any to any - ipfw add 5000 allow ip from any to any -} - -f4() { - ipfw -q flush - echo > /etc/libalias.conf - sysctl net.inet.ip.fw.verbose=1 - ipfw add 100 divert natd log ip from any to any - ipfw add 100 count ip from any to any - ipfw add 200 count ip from any to any - natd -v -interface ed2 & -} - -case $1 in - f[0-9]*) $* ;; -esac diff --git a/private/vale.4 b/private/vale.4 deleted file mode 100644 index c254ba57f..000000000 --- a/private/vale.4 +++ /dev/null @@ -1,253 +0,0 @@ -.\" Copyright (c) 2012-2014 Luigi Rizzo, Universita` di Pisa -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" This document is derived in part from the enet man page (enet.4) -.\" distributed with 4.3BSD Unix. -.\" -.\" $FreeBSD: head/share/man/man4/vale.4 228017 2011-11-27 06:55:57Z gjb $ -.\" -.Dd July 27, 2012 -.Dt VALE 4 -.Os -.Sh NAME -.Nm vale -.Nd a very fast Virtual Local Ethernet using the netmap API -.Sh SYNOPSIS -.Cd device netmap -.Sh DESCRIPTION -.Nm -is a feature of the -.Nm netmap -module that implements multiple Virtual switches that can -be used to interconnect netmap clients, including traffic -sources and sinks, packet forwarders, userspace firewalls, -and so on. -.Pp -.Nm -is implemented completely in software, and is extremely fast. -On a modern machine it can move almost 20 Million packets per -second (Mpps) per core with small frames, and about 70 Gbit/s -with 1500 byte frames. -.Pp -.Sh OPERATION -.Nm -dynamically creates switches and ports as client connect -to it using the -.Xr netmap 4 -API. -.Pp -.Nm -ports are named -.Pa vale[bdg:][port] -where -.Pa vale -is the prefix indicating a VALE switch rather than a standard interface, -.Pa bdg -indicates a specific switch (the colon is a separator), -and -.Pa port -indicates a port within the switch. -Bridge and ports names are arbitrary strings, the only -constraint being that the full name must fit within 16 -characters. -.Pp -.Nm -ports can be physical network interfaces that support -.Xr netmap 4 -API -by specifying the interface name for -.Pa [port]. -See -.Nm OPERATION -section in -.Xr netmap 4 -for details of the naming rule. -.Pp -Physical interfaces are attached using -.Pa NIOCGREGIF -command of -.Pa ioctl(), -and -.Pa NETMAP_BDG_ATTACH -at -.Em nr_cmd -field in -.Em struct nmreq . -The corresponding host stack can also be attached to the bridge, specifying -.Pa NETMAP_BDG_HOST -in -.Em nr_arg1 . -To detach the interface from the bridge, -.Pa NETMAP_BDG_DETACH -is used instead of NETMAP_BDG_ATTACH. -The host stack is also detached from the bridge at the same -time if it has been attached. -.Pp -Physical interfaces are treated as system configuration; -they are kept being attached even after the configuring process dies, -and detached by any process. -.Pp -Once a physical interface is attached, this interface is no longer -available to be directly accessed by netmap clients (user processes) or to be -attached by another bridge. -On the other hand, when any netmap client holds the physical interface, -this interface cannot be attached to a bridge. -.Pp -.Pa NETMAP_BDG_LIST -subcommand in nr_cmd of -.Em struct nmreq -is used to obtain bridge and port -information. There are two modes of how it works; -If any -.Em nr_name -starting from non '\\0' is provided, -.Pa ioctl() -returning -indicates the position of -the named interface. -This position is represented by an index of the bridge and the port, and -put in -.Em nr_arg1 -and -.Em nr_arg2 -fields, respectively. If the named interface does not exist, -.Pa ioctl() -returns -.Pa EINVAL . -.Pp -If -.Em nr_name -starting from '\\0' is provided, -.Pa ioctl() -returning indicates the -first existing interface on and after the position specified in -.Em nr_arg1 -and -.Em nr_arg2. -If the caller specified a port index greater than the highest -index of the ports, it is recognized as port index 0 of the -next bridge -( -.Em nr_arg1 -+ 1, -.Em nr_arg2 -= 0). -.Pa ioctl() -returns -.Pa EINVAL -if the given position is higher than that of -any existing interface. -On successful return of -.Pa ioctl() , -the interface name is also stored in -.Em nr_name . -.Pa NETMAP_BDG_LIST -is always used with -.Pa NIOCGINFO -command of -.Pa ioctl() -.Pp -Below is an example of printing all the existing ports walking through -all the bridges. - -.Bd -literal -compact -struct nmreq nmr; -int fd = open("/dev/netmap", O_RDWR); - -bzero(&nmr, sizeof(nmr)); -nmr.nr_version = NETMAP_API; -nmr.nr_cmd = NETMAP_BDG_LIST; -nmr.nr_arg1 = nmr.nr_arg2 = 0; /* start from bridge:0 port:0 */ -for (; !ioctl(fd, NIOCGINFO, &nmr); nmr.nr_arg2++) { - D("bridge:%d port:%d %s", nmr.nr_arg1, nmr.nr_arg2, - nmr.nr_name); - nmr.nr_name[0] = '\\0'; -} -.Ed -.Pp -See -.Xr netmap 4 -for details on the API. -.Ss LIMITS -.Nm -currently supports up to 8 switches, 254 ports per switch, -1024 buffers per port. These hard limits will be -changed to sysctl variables in future releases. -.Pp -Attaching the host stack to the bridge imposes significant performance -degradation when many packets are forwarded to the host stack by either -unicast or broadcast. -This is because every single packet going to the host stack causes mbuf -allocation in the same thread context as one forwarding packets. -.Pp -.Sh SYSCTL VARIABLES -.Nm -uses the following sysctl variables to control operation: -.Bl -tag -width 12 -.It dev.netmap.bridge -The maximum number of packets processed internally -in each iteration. -Defaults to 1024, use lower values to trade latency -with throughput. -.Pp -.It dev.netmap.verbose -Set to non-zero values to enable in-kernel diagnostics. -.El -.Pp -.Sh EXAMPLES -Create one switch, with a traffic generator connected to one -port, and a netmap-enabled tcpdump instance on another port: -.Bd -literal -offset indent -tcpdump -ni vale-a:1 & -pkt-gen -i vale-a:0 -f tx & -.Ed -.Pp -Create two switches, -each connected to two qemu machines on different ports. -.Bd -literal -offset indent -qemu -net nic -net netmap,ifname=vale-1:a ... & -qemu -net nic -net netmap,ifname=vale-1:b ... & -qemu -net nic -net netmap,ifname=vale-2:c ... & -qemu -net nic -net netmap,ifname=vale-2:d ... & -.Ed -.Sh SEE ALSO -.Xr netmap 4 -.Pp -.Xr http://info.iet.unipi.it/~luigi/vale/ -.Pp -Luigi Rizzo, Giuseppe Lettieri: VALE, a switched ethernet for virtual machines, -June 2012, http://info.iet.unipi.it/~luigi/vale/ -.Sh AUTHORS -.An -nosplit -The -.Nm -switch has been designed and implemented in 2012 by -.An Luigi Rizzo -and -.An Giuseppe Lettieri -at the Universita` di Pisa. -.Pp -.Nm -has been funded by the European Commission within FP7 Projects -CHANGE (257422) and OPENLAB (287581). diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index aa531a5c4..8f96fd080 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1173,10 +1173,8 @@ netmap_txsync_to_host(struct netmap_adapter *na) * We protect access to the kring using kring->rx_queue.lock * * This routine also does the selrecord if called from the poll handler - * (we know because td != NULL). + * (we know because sr != NULL). * - * NOTE: on linux, selrecord() is defined as a macro and uses pwait - * as an additional hidden argument. * returns the number of packets delivered to tx queues in * transparent mode, or a negative value if error */ @@ -1268,7 +1266,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, NM_SELRECORD_T *sr) */ int -netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) +netmap_get_hw_na(struct nmreq *nmr, struct ifnet *ifp, struct netmap_adapter **na) { /* generic support */ int i = netmap_admode; /* Take a snapshot. */ @@ -1342,6 +1340,16 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) } ND("Created generic NA %p (prev %p)", gna, gna->prev); + if( nmr->nr_arg2 ){ + int poolno = nmr->nr_arg2 - 1; + struct netmap_mem_d *old = (*na)->nm_mem; + struct netmap_mem_d *new = netmap_mem_get_allocator( poolno ); + netmap_mem_put(old); + netmap_mem_get(new); + (*na)->nm_mem = new; + ND("force mempool #%d for %s: %p -> %p", poolno, ifp->if_xname, old, new); + } + return 0; #else /* !WITH_GENERIC */ return EOPNOTSUPP; @@ -1422,7 +1430,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) return ENXIO; } - error = netmap_get_hw_na(ifp, &ret); + error = netmap_get_hw_na(nmr, ifp, &ret); if (error) goto out; @@ -2112,7 +2120,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread NMG_LOCK(); do { /* memsize is always valid */ - struct netmap_mem_d *nmd = &nm_mem; + struct netmap_mem_d *nmd = netmap_mem_get_allocator(0); u_int memflags; if (nmr->nr_name[0] != '\0') { @@ -2661,7 +2669,7 @@ netmap_attach_common(struct netmap_adapter *na) if (na->nm_mem == NULL) /* use the global allocator */ - na->nm_mem = &nm_mem; + na->nm_mem = netmap_mem_get_allocator(0); netmap_mem_get(na->nm_mem); #ifdef WITH_VALE if (na->nm_bdg_attach == NULL) diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index ded59e67b..94294e456 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1199,7 +1199,7 @@ int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); +int netmap_get_hw_na(struct nmreq *nmr, struct ifnet *ifp, struct netmap_adapter **na); #ifdef WITH_VALE diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 0beeec281..7025b8b2b 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -191,6 +191,7 @@ struct netmap_mem_d { nm_memid_t nm_id; /* allocator identifier */ int nm_grp; /* iommu groupd id */ + int nm_numa; /* numa node obj_pool is bound to */ /* list of all existing allocators, sorted by nm_id */ struct netmap_mem_d *prev, *next; @@ -365,24 +366,24 @@ static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = { * Virtual (VALE) ports will have each its own allocator. */ extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */ -struct netmap_mem_d nm_mem = { /* Our memory allocator. */ +struct netmap_mem_d nm_mem_blueprint = { .pools = { [NETMAP_IF_POOL] = { - .name = "netmap_if", + .name = "netmap_%d_if", .objminsize = sizeof(struct netmap_if), .objmaxsize = 4096, .nummin = 10, /* don't be stingy */ .nummax = 10000, /* XXX very large */ }, [NETMAP_RING_POOL] = { - .name = "netmap_ring", + .name = "netmap_%d_ring", .objminsize = sizeof(struct netmap_ring), .objmaxsize = 32*PAGE_SIZE, .nummin = 2, .nummax = 1024, }, [NETMAP_BUF_POOL] = { - .name = "netmap_buf", + .name = "netmap_%d_buf", .objminsize = 64, .objmaxsize = 65536, .nummin = 4, @@ -390,17 +391,23 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ }, }, - .nm_id = 1, + .nm_id = -1, .nm_grp = -1, + .nm_numa = -1, - .prev = &nm_mem, - .next = &nm_mem, + .prev = NULL, + .next = NULL, .ops = &netmap_mem_global_ops }; -static struct netmap_mem_d *netmap_last_mem_d = &nm_mem; +/* number of global memory pools, should be dynamic */ +#define NGPOOLS 2 + +struct netmap_mem_d nm_mems[NGPOOLS]; /* Our global memory allocator. */ + +static struct netmap_mem_d *netmap_last_mem_d = &nm_mems[NGPOOLS - 1]; /* blueprint for the private memory allocators */ extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */ @@ -432,6 +439,8 @@ static const struct netmap_mem_d nm_blueprint = { .flags = NETMAP_MEM_PRIVATE, + .nm_grp = -1, + .ops = &netmap_mem_private_ops }; @@ -440,17 +449,18 @@ static const struct netmap_mem_d nm_blueprint = { #define STRINGIFY(x) #x +/* TODO: not much sense for nm_mem_blueprint */ #define DECLARE_SYSCTLS(id, name) \ SYSBEGIN(mem2_ ## name); \ SYSCTL_DECL(_dev_netmap); /* leave it here, easier for porting */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ - CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ + CTLFLAG_RD, &nm_mem_blueprint.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ - CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ + CTLFLAG_RD, &nm_mem_blueprint.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \ CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \ "Default size of private netmap " STRINGIFY(name) "s"); \ @@ -463,7 +473,7 @@ DECLARE_SYSCTLS(NETMAP_IF_POOL, if); DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); -/* call with NMA_LOCK(&nm_mem) held */ +/* call with NMA_LOCK(&nm_mems[0]) held */ static int nm_mem_assign_id_locked(struct netmap_mem_d *nmd) { @@ -475,7 +485,7 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) /* we rely on unsigned wrap around */ id = scan->nm_id + 1; if (id == 0) /* reserve 0 as error value */ - id = 1; + id = NGPOOLS; scan = scan->next; if (id != scan->nm_id) { nmd->nm_id = id; @@ -492,15 +502,15 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) return error; } -/* call with NMA_LOCK(&nm_mem) *not* held */ +/* call with NMA_LOCK(&nm_mems[0]) *not* held */ static int nm_mem_assign_id(struct netmap_mem_d *nmd) { int ret; - NMA_LOCK(&nm_mem); + NMA_LOCK(&nm_mems[0]); ret = nm_mem_assign_id_locked(nmd); - NMA_UNLOCK(&nm_mem); + NMA_UNLOCK(&nm_mems[0]); return ret; } @@ -508,7 +518,7 @@ nm_mem_assign_id(struct netmap_mem_d *nmd) static void nm_mem_release_id(struct netmap_mem_d *nmd) { - NMA_LOCK(&nm_mem); + NMA_LOCK(&nm_mems[0]); nmd->prev->next = nmd->next; nmd->next->prev = nmd->prev; @@ -518,7 +528,7 @@ nm_mem_release_id(struct netmap_mem_d *nmd) nmd->prev = nmd->next = NULL; - NMA_UNLOCK(&nm_mem); + NMA_UNLOCK(&nm_mems[0]); } static int @@ -1163,7 +1173,7 @@ nm_alloc_lut(u_int nobj) /* call with NMA_LOCK held */ static int -netmap_finalize_obj_allocator(struct netmap_obj_pool *p) +netmap_finalize_obj_allocator(struct netmap_obj_pool *p, int numanode) { int i; /* must be signed */ size_t n; @@ -1370,7 +1380,7 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) nmd->lasterr = 0; nmd->nm_totalsize = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { - nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]); + nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i], nmd->nm_numa); if (nmd->lasterr) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; @@ -1401,6 +1411,12 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) return nmd->lasterr; } +/* always return something sensible, even if nm_id is out of range */ +struct netmap_mem_d * +netmap_mem_get_allocator(int nm_id){ + return &nm_mems[nm_id % NGPOOLS]; +} + static void @@ -1612,27 +1628,50 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd) static void netmap_mem_global_delete(struct netmap_mem_d *nmd) { - int i; + unsigned int i, j; - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_destroy_obj_allocator(&nm_mem.pools[i]); + for (i = 0; i < NGPOOLS; i++) { + struct netmap_mem_d *nmd = &nm_mems[i]; + for (j = 0; j < NETMAP_POOLS_NR; j++) { + netmap_destroy_obj_allocator(&nmd->pools[j]); + } + NMA_LOCK_DESTROY(nmd); } - - NMA_LOCK_DESTROY(&nm_mem); } int netmap_mem_init(void) { - NMA_LOCK_INIT(&nm_mem); - netmap_mem_get(&nm_mem); + unsigned int i, j; + + for(i = 0; i < NGPOOLS; i++){ + struct netmap_mem_d *nmd = &nm_mems[i]; + *nmd = nm_mem_blueprint; + NMA_LOCK_INIT(nmd); + nmd->nm_id = i + 1; + nmd->nm_numa = i; + nmd->prev = nm_mems + ((i - 1) % NGPOOLS); + nmd->next = nm_mems + ((i + 1) % NGPOOLS); + for (j = 0; j < NETMAP_POOLS_NR; j++){ + struct netmap_obj_pool *ref = &nm_mem_blueprint.pools[j]; + struct netmap_obj_pool *p = &nmd->pools[j]; + snprintf(p->name, NETMAP_POOL_MAX_NAMSZ, ref->name, nmd->nm_id); + } + netmap_mem_get(nmd); + } + return (0); } void netmap_mem_fini(void) { - netmap_mem_put(&nm_mem); + unsigned int i; + + for(i = 0; i < NGPOOLS; i++){ + struct netmap_mem_d *nmd = &nm_mems[i]; + netmap_mem_put(nmd); + } } static void diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index a89b3f728..ecb1b2607 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -113,10 +113,9 @@ * reconfiguration. The private allocators release all their memory when there * are no active users. By 'active user' we mean an existing netmap_priv * structure holding a reference to the allocator. - */ - -extern struct netmap_mem_d nm_mem; + */ +struct netmap_mem_d * netmap_mem_get_allocator(int nm_id); int netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *); vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t); #ifdef _WIN32 diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index b48e925ab..4043e84fd 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -688,7 +688,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) } else { struct netmap_adapter *hw; - error = netmap_get_hw_na(ifp, &hw); + error = netmap_get_hw_na(nmr, ifp, &hw); if (error || hw == NULL) goto out;