From 9f9028a779140e86cc4a754223d45d1085ac1595 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Tue, 6 Jan 2015 10:08:46 -0800 Subject: [PATCH 01/18] implement make_dev_credf() as in FreeBSD --- LINUX/bsd_glue.h | 4 ++-- sys/dev/netmap/netmap.c | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/LINUX/bsd_glue.h b/LINUX/bsd_glue.h index cbec5b480..aa4da2c95 100644 --- a/LINUX/bsd_glue.h +++ b/LINUX/bsd_glue.h @@ -373,13 +373,13 @@ typedef int (d_mmap_t)(struct file *f, struct vm_area_struct *vma); typedef unsigned int (d_poll_t)(struct file * file, struct poll_table_struct *pwait); /* - * make_dev will set an error and return the first argument. + * make_dev_credf() will set an error and return the first argument. * This relies on the availability of the 'error' local variable. * For old linux systems that do not have devfs, generate a * message in syslog so the sysadmin knows which command to run * in order to create the /dev/netmap entry */ -#define make_dev(_cdev, _zero, _uid, _gid, _perm, _name) \ +#define make_dev_credf(_flags, _cdev, _zero, _cred, _uid, _gid, _perm, _name) \ ({error = misc_register(_cdev); \ D("run mknod /dev/%s c %d %d # error %d", \ (_cdev)->name, MISC_MAJOR, (_cdev)->minor, error); \ diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index e66492d4a..ed5083d05 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -3073,8 +3073,13 @@ netmap_init(void) error = netmap_mem_init(); if (error != 0) goto fail; - /* XXX could use make_dev_credv() to get error number */ - netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, + /* + * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls + * when the module is compiled in. + * XXX could use make_dev_credv() to get error number + */ + netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, + &netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0660, "netmap"); if (!netmap_dev) goto fail; From 32e06f9d18bf82e40a7c5b6e769c0ca7607913fc Mon Sep 17 00:00:00 2001 From: User Luigi Date: Sun, 25 Jan 2015 20:23:09 -0800 Subject: [PATCH 02/18] set the mode to 0600 so only root can access netmap devices (imported from FreeBSD) --- sys/dev/netmap/netmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index ed5083d05..e69382fe4 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -3079,7 +3079,7 @@ netmap_init(void) * XXX could use make_dev_credv() to get error number */ netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD, - &netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0660, + &netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "netmap"); if (!netmap_dev) goto fail; From 031f050e9e38a0df67780780ee79511dbffca702 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Thu, 6 Aug 2015 07:37:42 -0700 Subject: [PATCH 03/18] comment on the use of contigmalloc (merge candidate) --- sys/dev/netmap/netmap_mem2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 2233357d1..0beeec281 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -1197,6 +1197,13 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) int lim = i + p->_clustentries; char *clust; + /* + * XXX Note, we only need contigmalloc() for buffers attached + * to native interfaces. In all other cases (nifp, netmap rings + * and even buffers for VALE ports or emulated interfaces) we + * can live with standard malloc, because the hardware will not + * access the pages directly. + */ clust = contigmalloc(n, M_NETMAP, M_NOWAIT | M_ZERO, (size_t)0, -1UL, PAGE_SIZE, 0); if (clust == NULL) { From 9b08ab07522ff20c2ed49b64f7621f28114b5655 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Thu, 6 Aug 2015 07:38:48 -0700 Subject: [PATCH 04/18] whitespace fix and removal of unused functions --- WINDOWS/win_glue.c | 141 ++++++++-------------------- WINDOWS/win_glue.h | 228 ++++++++++++++++++++++----------------------- 2 files changed, 146 insertions(+), 223 deletions(-) diff --git a/WINDOWS/win_glue.c b/WINDOWS/win_glue.c index f250ce4f5..d2c937e74 100644 --- a/WINDOWS/win_glue.c +++ b/WINDOWS/win_glue.c @@ -1,33 +1,37 @@ /* -* Copyright (C) 2015 Universita` di Pisa. All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions -* are met: -* 1. Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* 2. Redistributions in binary form must reproduce the above copyright -* notice, this list of conditions and the following disclaimer in the -* documentation and/or other materials provided with the distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -* SUCH DAMAGE. -*/ + * Copyright (C) 2015 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include -/********************************************************* -* TIME FUNCTIONS (COPIED FROM DUMMYNET) * -**********************************************************/ +/* + * reimplementation of some FreeBSD/Linux kernel functions used in netmap. + */ + +/* + * TIME FUNCTIONS (COPIED FROM DUMMYNET) + */ void do_gettimeofday(struct timeval *tv) { @@ -67,85 +71,14 @@ do_gettimeofday(struct timeval *tv) } -/********************************************************* -* KERNEL MEMORY ALLOCATION * -**********************************************************/ -extern struct dictionary_box dict_box; - - -static inline int -ilog2(uint64_t n) -{ - uint64_t k = 1ULL << 63; - int i; - for (i = 63; i >= 0 && !(n &k); i--, k >>= 1) - ; - return i; -} - -#define roundup_pow_of_two(n) (n == 1) ? 1 : (1UL << (ilog2((n) - 1) + 1)) - -char * -win_contigMalloc(int sz, int page_size) -{ - PHYSICAL_ADDRESS LowAddress; - PHYSICAL_ADDRESS HighestAcceptable; - PHYSICAL_ADDRESS SkipAddress; - PVOID p_; - - (void)page_size; // XXX - - LowAddress.QuadPart = 0; - HighestAcceptable.QuadPart = -1; - SkipAddress.QuadPart = 0; - //If needed lowest address can be used MmAllocateContiguousMemorySpecifyCache - //p_ = MmAllocateContiguousMemory(sz, HighestAcceptable); - p_ = ExAllocatePoolWithTag(NonPagedPool, sz, M_NETMAP); - - /*PVOID p_ = MmAllocateContiguousMemorySpecifyCache(sz, - LowAddress, - HighestAcceptable, - SkipAddress, - MmNonCached);*/ -#if 0 - PMDL pMdl = MmAllocatePagesForMdlEx(LowAddress, - HighestAcceptable, - SkipAddress, - sz, - MmNonCached, - MM_ALLOCATE_REQUIRE_CONTIGUOUS_CHUNKS - ); - PVOID p_ = MmMapLockedPagesSpecifyCache(pMdl, - KernelMode, - MmNonCached, - NULL, - FALSE, - NormalPagePriority); -#endif - if (p_ != NULL) { - RtlZeroMemory(p_, sz); - } -#if 0 - IoFreeMdl(pMdl); -#endif - return (p_ != NULL ? (char*)p_ : NULL); -} -void -win_ContigFree(void *virtualAddress) -{ - //MmFreeContiguousMemory(virtualAddress); - ExFreePoolWithTag(virtualAddress, M_NETMAP); -} - -/********************************************************* -* Hardware/generic functions * -**********************************************************/ - - -/********************************************************* -* SYSCTL emulation (copied from dummynet\glue.h) * -**********************************************************/ +/* + * SYSCTL emulation (copied from dummynet/glue.h) + * + * This was a mechanism used in dummynet (and early netmap versions) + * to configure parameters. It is being replaced by other mechansism + * so the following block of code will likely go away. + */ static struct sysctltable GST; void sysctl_addgroup_main_init(); diff --git a/WINDOWS/win_glue.h b/WINDOWS/win_glue.h index 1a1a3355f..12586b95c 100644 --- a/WINDOWS/win_glue.h +++ b/WINDOWS/win_glue.h @@ -26,14 +26,16 @@ #ifndef _WIN_GLUE_H_ #define _WIN_GLUE_H_ +/* + * This header is used to compile the kernel components of netmap for Windows. + * Its purpose is to remap common FreeBSD/Linux kernel data structures and + * functions into compatible Windows ones. + */ + #ifdef __CYGWIN__ -/* we have __attribute__ */ #define _WIN32 /* we use _WIN32 throughout the code */ #else /* some MSC pragmas etc. */ -// #define __attribute__(x) __declspec(align(64)) -// #define __aligned__ align - //Disabling unuseful warnings #pragma warning(disable:4018) // expression: signed/unsigned mismatch #pragma warning(disable:4047) // operator: different levels of indirection @@ -56,25 +58,22 @@ #pragma warning(disable:4267) //conversion from 'size_t' to . possible loss of data -#endif //__CYGWIN__ +#endif /* !__CYGWIN__ */ -/*#define FILTER_MAJOR_NDIS_VERSION NDIS_SUPPORT_NDIS6 -#define FILTER_MINOR_NDIS_VERSION NDIS_SUPPORT_NDIS6*/ -#define NDIS_SUPPORT_NDIS6 1 //gives support for NDIS NET_BUFFERs +#define NDIS_SUPPORT_NDIS6 1 //gives support for NDIS NET_BUFFERs -#define WIN32_LEAN_AND_MEAN 1 +#define WIN32_LEAN_AND_MEAN 1 -#ifndef IS_USERSPACE #include #include #include //#include // definition of IP_ADAPTER_INFO #include // definition of IF_NAMESIZE -#include // various NT definitions +#include // various NT definitions #include -#include //machine specific code (for example le64toh) +#include //machine specific code (for example le64toh) #include -#endif //IS_USERSPACE + #define M_DEVBUF 'nmDb' /* netmap pool for memory allocation */ #define M_NETMAP 'nmBm' /* bitmap pool for netmap_mem2.c */ @@ -86,72 +85,76 @@ #define IFNAMSIZ 44//IF_NAMESIZE //defined in netioapi.h, is 256 //XXX_ale must set the same here and in userspace somehow -/********************************************************* -* REDEFINITION OF UNCOMMON STRUCTURES FOR WINDOWS * -**********************************************************/ +/* + * C types and structs missing on Windows + */ // From inttypes.h -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef uint32_t u_int; -typedef ULONG u_long; -typedef SSIZE_T ssize_t; - - -#ifndef IS_USERSPACE -typedef struct timeval { - LONGLONG tv_sec; - LONGLONG tv_usec; -} timeval; -typedef char* caddr_t; +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +typedef uint32_t u_int; +typedef ULONG u_long; +typedef SSIZE_T ssize_t; + + +struct timeval { + LONGLONG tv_sec; + LONGLONG tv_usec; +}; + +typedef char * caddr_t; typedef PHYSICAL_ADDRESS vm_paddr_t; -typedef uint32_t vm_offset_t; -typedef ULONG vm_ooffset_t; +typedef uint32_t vm_offset_t; +typedef ULONG vm_ooffset_t; #define thread PIO_STACK_LOCATION -#endif //IS_USERSPACE + + //-------------------------------------------------------- -/********************************************************* -* ERRNO->NTSTATUS TRANSLATION * -**********************************************************/ +/* + * ERRNO -> NTSTATUS TRANSLATION + */ #define ENOBUFS STATUS_DEVICE_INSUFFICIENT_RESOURCES #define EOPNOTSUPP STATUS_INVALID_DEVICE_REQUEST -/********************************************************* -* NO USE IN WINDOWS CODE * -**********************************************************/ + +/* + * NO USE IN WINDOWS CODE + */ #define destroy_dev(a) #define __user #define nm_iommu_group_id(dev) 0 -#define if_printf DbgPrint -/********************************************************* -* TRANSLATION OF GCC COMPILER ATTRIBUTES TO MSVC COMPILER* -**********************************************************/ + + +/* + * TRANSLATION OF GCC COMPILER ATTRIBUTES TO MSVC COMPILER + */ #ifdef _MSC_VER -#define inline __inline -#define __builtin_prefetch(x) _mm_prefetch(x,_MM_HINT_T2) +#define inline __inline +#define __builtin_prefetch(x) _mm_prefetch(x, _MM_HINT_T2) #endif //_MSC_VER -#ifndef IS_USERSPACE static void panic(const char *fmt, ...) { DbgPrint(fmt); NT_ASSERT(1); } + +#define if_printf DbgPrint #define __assert NT_ASSERT -#define assert NT_ASSERT +#define assert NT_ASSERT -/********************************************************* -* SPINLOCKS DEFINITION * -**********************************************************/ +/* + * SPINLOCKS DEFINITION + */ typedef struct { KSPIN_LOCK sl; KIRQL irql; @@ -162,16 +165,16 @@ static inline void spin_lock_init(win_spinlock_t *m) KeInitializeSpinLock(&(m->sl)); } -//Acquires the spinlock and saves the current IRQL level +/* Acquire the spinlock and saves the current IRQL level */ static inline void mtx_lock(win_spinlock_t *m) { - KeAcquireSpinLock(&(m->sl), &(m->irql)); + KeAcquireSpinLock(&(m->sl), &(m->irql)); } -//Release the spinlock and restore the old IRQL level +/* Release the spinlock and restore the old IRQL level */ static inline void mtx_unlock(win_spinlock_t *m) { - KeReleaseSpinLock(&(m->sl), (m->irql)); + KeReleaseSpinLock(&(m->sl), (m->irql)); } #define mtx_init(a, b, c, d) spin_lock_init(a) @@ -179,11 +182,11 @@ static inline void mtx_unlock(win_spinlock_t *m) #define mtx_lock_spin(a) mtx_lock(a) #define mtx_unlock_spin(a) mtx_unlock(a) -//-------------------------------------------------------- -/********************************************************* -* READ/WRITE LOCKS DEFINITION * -**********************************************************/ + +/* + * READ/WRITE LOCKS DEFINITION + */ #define BDG_RWLOCK_T ERESOURCE #define BDG_RWINIT(b) ExInitializeResourceLite(b.bdg_lock) @@ -196,24 +199,24 @@ static inline void mtx_unlock(win_spinlock_t *m) #define BDG_SET_VAR(lval, p) ((lval) = (p)) #define BDG_GET_VAR(lval) (lval) #define BDG_FREE(p) free(p) -//-------------------------------------------------------- -/********************************************************* -* SLEEP/WAKEUP THREADS * -**********************************************************/ + +/* + * SLEEP/WAKEUP THREADS + */ #define PI_NET 16 #define init_waitqueue_head(x) KeInitializeEvent(x, NotificationEvent, TRUE); #define netmap_knlist_destroy(x) #define tsleep(ident, priority, wmesg, time) KeDelayExecutionThread(KernelMode, FALSE, (PLARGE_INTEGER)time) -//-------------------------------------------------------- + #define mb KeMemoryBarrier #define rmb KeMemoryBarrier //XXX_ale: doesn't seems to exist just a read barrier -/********************************************************* -* TIME FUNCTIONS * -**********************************************************/ +/* + * TIME FUNCTIONS + */ void do_gettimeofday(struct timeval *tv); static int time_uptime_w32() { @@ -226,51 +229,23 @@ static int time_uptime_w32() #define microtime do_gettimeofday #define time_second time_uptime_w32 + //-------------------------------------------------------- -#define snprintf _snprintf -#define printf DbgPrint +#define snprintf _snprintf +#define printf DbgPrint /* XXX copyin used in vale (indirect bufs) and netmap_offloadings.c * Is it ok to use RtlCopyMemory for user buffers ? */ #define copyin(src, dst, copy_len) RtlCopyMemory(dst, src, copy_len) -#if 0 -static NTSTATUS SafeAllocateString(OUT PUNICODE_STRING result, IN USHORT size) -{ - ASSERT(result != NULL); - if (result == NULL || size == 0) - return STATUS_INVALID_PARAMETER; - - result->Buffer = ExAllocatePoolWithTag(NonPagedPool, size, 'rtsM'); - result->Length = 0; - result->MaximumLength = size; - - if (result->Buffer) - RtlZeroMemory(result->Buffer, size); - else - return STATUS_NO_MEMORY; - - return STATUS_SUCCESS; -} -#endif /* UNUSED */ - -/********************************************************* -* GENERIC/HW SPECIFIC STRUCTURES * -**********************************************************/ +/* + * GENERIC/HW SPECIFIC STRUCTURES + */ struct netmap_adapter; -#if 0 -struct ifnet { - char if_xname[IFNAMSIZ]; /* external name (name + unit) */ - // struct ifaltq if_snd; /* output queue (includes altq) */ - struct netmap_adapter* na; - - int* netmap_generic_rx_handler; -}; -#endif struct net_device { char if_xname[IFNAMSIZ]; // external name (name + unit) @@ -288,12 +263,13 @@ struct net_device { }; #define ifnet net_device + struct mbuf { - struct mbuf *m_next; - struct mbuf *m_nextpkt; - uint32_t m_len; + struct mbuf *m_next; + struct mbuf *m_nextpkt; + uint32_t m_len; struct net_device *dev; - PVOID pkt; + PVOID pkt; void*(*netmap_default_mbuf_destructor)(struct mbuf *m); }; @@ -390,8 +366,6 @@ static void nm_os_mitigation_cleanup(struct nm_generic_mit *mit) } -static inline int ilog2(uint64_t n); -static inline int roundup_pow_of_two(int sz); /*------------------------------------------- * KERNEL MEMORY ALLOCATION and management @@ -524,13 +498,22 @@ PVOID send_up_to_stack(struct ifnet *ifp, struct mbuf *m, PVOID head); #define make_dev_credf(_a, _b, ...) ((void *)1) // non-null -#define contigmalloc(sz, ty, flags, a, b, pgsz, c) win_contigMalloc(sz,pgsz) -#define contigfree(va, sz, ty) win_ContigFree(va) +/* + * At the moment we can just do regular malloc on Windows. + * The only use for contigmalloc would be for netmap buffers + * for NICs using native netmap support. + * + * MmAllocatePagesForMdlEx() and MmMapLockedPagesSpecifyCache() + * would work for that, but they are incredibly slow. + */ +#define contigmalloc(sz, ty, flags, a, b, pgsz, c) \ + ExAllocatePoolWithTag(NonPagedPool, sz, M_NETMAP) +#define contigfree(va, sz, ty) ExFreePoolWithTag(va, M_NETMAP) -#define vtophys MmGetPhysicalAddress +#define vtophys MmGetPhysicalAddress #define MALLOC_DEFINE(a,b,c) + //-------------------------------------------------------- -#endif //IS_USERSPACE /********************************************************* * SYSCTL emulation (copied from dummynet/glue.h) * @@ -664,22 +647,25 @@ int kesysctl_emu_get(struct sockopt* sopt); void sysctl_pushback(char* name, int flags, int datalen, void* data); int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -int do_cmd(int optname, void *optval, uintptr_t optlen); +// int do_cmd(int optname, void *optval, uintptr_t optlen); //-------------------------------------------------------- -/********************************************************* -* POLL VALUES DEFINITIONS * -**********************************************************/ +/* + * POLL VALUES DEFINITIONS + */ #ifndef POLLRDNORM #define POLLRDNORM 0x0040 #endif + #ifndef POLLRDBAND #define POLLRDBAND 0x0080 #endif + #ifndef POLLIN #define POLLIN 0x0001 #endif + #ifndef POLLPRI #define POLLPRI 0x0002 #endif @@ -687,9 +673,11 @@ int do_cmd(int optname, void *optval, uintptr_t optlen); #ifndef POLLWRNORM #define POLLWRNORM 0x0100 #endif + #ifndef POLLOUT #define POLLOUT 0x0004 #endif + #ifndef POLLWRBAND #define POLLWRBAND 0x0200 #endif @@ -697,9 +685,11 @@ int do_cmd(int optname, void *optval, uintptr_t optlen); #ifndef POLLERR #define POLLERR 0x0008 #endif + #ifndef POLLHUP #define POLLHUP 0x0010 #endif + #ifndef POLLNVAL #define POLLNVAL 0x0020 #endif From 28a00d91e28b7f8497f08b67acad112ee7b32746 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 14 Aug 2015 19:31:59 -0700 Subject: [PATCH 05/18] report windows availability --- README | 81 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/README b/README index d833bd2bc..4086f9b01 100644 --- a/README +++ b/README @@ -4,18 +4,17 @@ NETMAP is a framework for very fast packet I/O from userspace. VALE is an equally fast in-kernel software switch using the netmap API. -Both are implemented as a single kernel module for FreeBSD and Linux, -and can deal with line rate on real or emulated 10 Gbit ports. +Both are implemented as a single kernel module for FreeBSD, Linux and +since summer 2015, also for Windows. +Netmap/VALE can handle tens of millions of packets per second, matching +the speed of 10G and 40G ports even with minimum sized frames. See details at http://info.iet.unipi.it/~luigi/netmap/ -In this directory you can find source code (BSD-Copyright) for FreeBSD -and Linux. Note that recent FreeBSD distributions already include both -NETMAP and VALE. - -For more details please look at the manpage (netmap.4) and -netmap home page above. +This repository, hosted at https://github.com/luigirizzo/netmap , contains +source code (BSD-Copyright) for FreeBSD, Linux and Windows. +Note that recent FreeBSD distributions already include both NETMAP and VALE. What is this good for @@ -24,11 +23,27 @@ Netmap is mostly useful for userspace applications that must deal with raw packets: traffic generators, sinks, monitors, loggers, software switches and routers, generic middleboxes, interconnection of virtual machines. -In this distribution you will find some example userspace code to build -a generator, a sink, and a simple bridge. The kernel module implements a -learning ethernet bridge. We also include patches for some applications -(noticeably libpcap) so you can run any libpcap client on top of netmap -hopefully at a higher speed. +The example/ directory includes pkt-gen.c (a fast traffic generator/receiver) +and bridge.c, a simple bidirectional interconnect between two ports. +The kernel module itself implements a learning ethernet bridge. + +More resources are hosted on other repositories. For example + + https://github.com/luigirizzo/netmap-libpcap + + contains a netmap-enabled version of libpcap (which is also + included in FreeBSD distribution) so you can run any libpcap client + on top of netmap at much higher speeds than using bpf. + + https://github.com/luigirizzo/netmap-ipfw + + is a userspace version of ipfw and dummynet which can handle several + million packets per second in a single thread + +Qemu/kvm has native netmap support, so it can interconnect VMs at high speed +through netmap ports. There is experimental netmap support in the FreeBSD's +bhyve hypervisor. + Netmap alone DOES NOT accelerate your TCP. For that you need to implement your own tcp/ip stack probably using some of the techniques indicated @@ -55,21 +70,19 @@ descriptors. Device drivers are completely in the kernel, and the system does not rely on IOMMU or other special mechanisms. - Installation instructions ------------------------- -A kernel module (netmap.ko or netmap_lin.ko) implements the core -NETMAP routines and the VALE switch. -Netmap-aware device drivers are needed to use netmap on ethernet ports. -To date, we have support for Intel ixgbe (10G), e1000/e1000e/igb (1G), -Realtek 8169 (1G) and Nvidia (1G). - -If you do not have a supported device, you can still try out netmap -(with reduced performance) because the main kernel module emulates -the netmap API on top of standard device drivers. - - FreeBSD instructions: - --------------------- +A single kernel module implements the core NETMAP functions, including +the VALE switch and access to physical NICS using unmodified device drivers +(at the price of much lower performance than netmap-aware drivers). + +Netmap-aware device drivers are needed to use netmap at high speed +on ethernet ports. To date, we have support for Intel ixgbe (10G), +ixl (10/40G), e1000/e1000e/igb (1G), Realtek 8169 (1G) and Nvidia (1G). +FreeBSD has also native netmap support in the Chelsio 10/40G cards. + + FreeBSD + ------- Since recent FreeBSD distributions already include netmap, you only need build the new kernel or modules as below: @@ -80,8 +93,8 @@ the netmap API on top of standard device drivers. + sample applications are in the examples/ directory in this archive, or in src/tools/tools/netmap/ in FreeBSD distributions - Linux instructions: - ------------------- + Linux + ----- On Linux, netmap is an out-of-tree module, so you need to compile it from these sources. The Makefile in the LINUX/ directory will also let you patch device driver sources and build some netmap-enabled @@ -106,6 +119,10 @@ the netmap API on top of standard device drivers. make SRC=/a/b/c/linux-sources-A.B/ KSRC=/a/b/c/linux-headers-A.B/ + WINDOWS + ------- + Netmap has been ported to Windows in summer 2015 by Alessio Faina as part of + his Master thesis. Please look at WINDOWS/README.txt for details. Applications ------------ @@ -209,7 +226,7 @@ Credits NETMAP and VALE are projects of the Universita` di Pisa, partially supported by various entities including: Intel Research Berkeley, EU FP7 projects CHANGE and OPENLAB, -Netapp/Silicon Valley Community Foundation, ICSI +Netapp/Silicon Valley Community Foundation, ICSI, Author: Luigi Rizzo Contributors: @@ -219,6 +236,8 @@ Contributors: Gaetano Catalli Matteo Landi Vincenzo Maffione + Stefano Garzarella + Alessio Faina References ---------- @@ -244,3 +263,7 @@ You can find the papers at http://info.iet.unipi.it/~luigi/research.html + Luigi Rizzo, Giuseppe Lettieri, Vincenzo Maffione, Speeding up packet I/O in virtual machines, IEEE/ACM ANCS 2013, San Jose, Oct. 2013 + ++ Stefano Garzarella, Giuseppe Lettieri, Luigi Rizzo, + Virtual device passthrough for high speed VM networking + IEEE/ACM ANCS 2015, Oakland, May 2015 From 75955a7bc8ce70f4c9be9c7d3d7c2307ff8665a1 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 14 Aug 2015 20:08:00 -0700 Subject: [PATCH 06/18] revise documentation --- WINDOWS/README.txt | 173 ++++++++++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 73 deletions(-) diff --git a/WINDOWS/README.txt b/WINDOWS/README.txt index 8997ea26d..65492b560 100644 --- a/WINDOWS/README.txt +++ b/WINDOWS/README.txt @@ -1,58 +1,88 @@ -This directory contains a port of netmap to Windows. +This directory contains a port of netmap to Windows, developed by Alessio +Faina as part of his MS thesis at the Universita` di Pisa. -For installation instructions see below +This port is made of two kernel modules, that you should build as indicated +in the "BUILD NETMAP MODULES" section below. -When using netmap with Windows NICs, the name to use for the interface is + netmap.sys + implements the netmap API, VALE switch, netmap pipes, monitor ports. + Performance is similar to that on FreeBSD and Linux: 20Mpps on switch + ports, over 100 Mpps on pipes. - netmap:ethXX + To load the module, do the following (as adminstrator) -where XX is the 'interface index' that you can show with the command + (cd Output-Win8.1Release/netmap-pkg; ./nm-loader l) - netsh int ipv4 show interfaces + To test the functionality you can run the following programs + in two terminals -(or many other ways) + ../examples/pkt-gen -i vale1:a -f tx # sender ------------- BRIEF DESCRIPTION ---------- + ../examples/pkt-gen -i vale1:b -f rx # receiver -The solution Netmap.sln contains the following projects: + The module can be unloaded with -netmap - the core of the netmap kernel module, can be used by itself to - create VALE ports and netmap pipes + (cd WINDOWS/Output-Win8.1Release/netmap-pkg; ./nm-loader u) -netmap-pkg - a Test signed version of netmap module to be used on a 64 bit version - of Windows with Test Sign mode activated + nm-ndis.sys + implements access to the NIC and the host stack using native Windows + NIC drivers. Though faster than sockets, speed is limited by the device + drivers, in the 1-2Mpps range. -nm-ndis - a kernel module to attach the netmap core to a - physical network devices through NDIS hooks + The module must be installed as a network service (see "INSTALL NM-NDIS" below) + after which you can send or receive packets using any netmap application and -nm-ndis-pkg - a Test signed version of the above, again for use on 64 bit versions - of Windows with Test Sign mode activated + netmap:ethXX -loader - a userspace program to dynamically load and unload the Netmap kernel - module without the need to install it and load it at OS startup. + as the port name. XX is the Windows "interface index" that can be + shown with the followin command (or many other ways): -sysctl - (not complete) a tool to manipulate the sysctl variables in the netmap module. + netsh int ipv4 show interfaces -Projects are visual studio files, .vcxprj . The format is defined in +Note that on 64-bit platforms kernel modules should be signed. +The build script we supply do a Test-sign of the modules, +and to load them you should run the system in Test-mode. -http://blog.bfitz.us/?p=922 +------------ BUILD NETMAP MODULES ---------- -The directory ../examples contains pkt-gen, a userspace program -used to test the speed of a link between two netmap interfaces. +To build the module you need to install (in this order): + - Visual Studio 2013 (express is enough) + https://www.visualstudio.com/downloads/download-visual-studio-vs + Note, you can reach the 2013 version from the + 'Download older versions' link + - Windows Driver Kit (WDK) version 8.1 update + https://msdn.microsoft.com/en-us/windows/hardware/gg454513.aspx ------------- INSTALL INSTRUCTIONS (netmap core)------------ -The netmap core module implements the basic netmap API, the VALE -software switch and pipes. -It can be loaded dynamically, or installed as a service and loaded at boot time. + - Cygwin, + http://www.cygwin.com/ + with base packages, make, C compiler, possibly git and other utilities + used to build the userspace program pkt-gen + +a) Build with command line tools and MsBuild.exe + We have a makefile which builds everything, just run + make # will build all projects and pkt-gen + make clean # will clean output directories + + The output will be found in the directory ./Output- + + Please look at the makefile to select different configurations + +b) Build with the Visual studio GUI + - Open the \WINDOWS\VsSolution\Netmap.sln solution + - Select on the build type combobox the type of Operating System (Win7/8/8.1) + and the type of build (Debug/Release) + - Click on "Compile", then "Compile solution" + - The output will be found under + \WINDOWS\Output-\ + + + +------------ INSTALL NETMAP.SYS ------------ + +The easiest way to install the netmap core is manually, using the nm-loader +program that we build. -a) DYNAMIC LOAD: - Open a "cmd" window with administrative privileges - Change into the directory containing netmap.sys< typically WINDOWS/Output-Win8.1Release/netmap-pkg @@ -64,14 +94,15 @@ a) DYNAMIC LOAD: - To unload the module run ../nm-loader u -b) PERSISTENT INSTALL MODULE +You can also install the module persistently, as follows: + - Open the folder containing netmap.{sys|inf|cat} , same as above - Right click on the .inf file and select -INSTALL- from the context menu; after a reboot the module will be correctly loaded ------------- INSTALL INSTRUCTIONS (nm-ndis module)------------ -The nm-ndis module implement communication with NDIS adapters and the host stack. -It implements a lightweight filter that runs as a service on an adapter +------------ INSTALL NM-NDIS ------------ +The nm-ndis module implements a lightweight filter that runs as a service +on an adapter, and is used access the host stack and the NIC from netmap. netcfg - open the configuration panel for the network card in use @@ -88,40 +119,6 @@ If the netmap core module has been installed in a dynamic way, remember to deinstall the nm-ndis module before a shutdown/reboot and before unloading the netmap core module. ------------- NETMAP BUILD INSTRUCTIONS ---------- -Requirements: -To build the module you need to install (in this order): - - Visual Studio 2013 (express is enough) - https://www.visualstudio.com/downloads/download-visual-studio-vs - Note, you can reach the 2013 version from the - 'Download older versions' link - - - Windows Driver Kit (WDK) version 8.1 update - https://msdn.microsoft.com/en-us/windows/hardware/gg454513.aspx - - - Cygwin, - http://www.cygwin.com/ - with base packages, make, c compiler, eventually gdb - used to build the userspace program pkt-gen - - -a) Build with the Visual studio GUI - - Open the \WINDOWS\VsSolution\Netmap.sln solution - - Select on the build type combobox the type of Operating System (Win7/8/8.1) - and the type of build (Debug/Release) - - Click on "Compile", then "Compile solution" - - The output will be found under - \WINDOWS\Output-\ - -b) Build with command line tools and MsBuild.exe - We have a makefile which builds everything, just run - make # will build all projects and pkt-gen - make clean # will clean output directories - - The output will be found in the directory ./Output- - - Please look at the makefile to select different configurations - ------------ GENERAL TIPS ----------------------------------- @@ -173,3 +170,33 @@ NETMAP to NIC ring about 1Mpps VM-to-VM pkt-gen-b -i netmap:1 -f tx # on one vm pkt-gen-b -i netmap:1 -f rx # on another vm +------------ BRIEF DESCRIPTION ---------- + +The solution Netmap.sln contains the following projects: + +netmap + the core of the netmap kernel module, can be used by itself to + create VALE ports and netmap pipes + +netmap-pkg + a Test signed version of netmap module to be used on a 64 bit version + of Windows with Test Sign mode activated + +nm-ndis + a kernel module to attach the netmap core to a + physical network devices through NDIS hooks + +nm-ndis-pkg + a Test signed version of the above, again for use on 64 bit versions + of Windows with Test Sign mode activated + +loader + a userspace program to dynamically load and unload the Netmap kernel + module without the need to install it and load it at OS startup. + +sysctl + (not complete) a tool to manipulate the sysctl variables in the netmap module. + +Projects are visual studio files, .vcxprj . The format is defined in + +http://blog.bfitz.us/?p=922 From aee6fad03b87fa3ce58b5f207f23029764216972 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 14 Aug 2015 21:18:04 +0200 Subject: [PATCH 07/18] update documentation --- WINDOWS/README.txt | 221 +++++++++++++++++++++++++++++---------------- 1 file changed, 141 insertions(+), 80 deletions(-) diff --git a/WINDOWS/README.txt b/WINDOWS/README.txt index 65492b560..fd7097d7f 100644 --- a/WINDOWS/README.txt +++ b/WINDOWS/README.txt @@ -1,67 +1,80 @@ -This directory contains a port of netmap to Windows, developed by Alessio -Faina as part of his MS thesis at the Universita` di Pisa. +This directory contains Windows version of netmap, developed by +Alessio Faina as part of his MS thesis at the Universita` di Pisa. -This port is made of two kernel modules, that you should build as indicated -in the "BUILD NETMAP MODULES" section below. +This version is made of two kernel modules, that you should build +as indicated in the "BUILD INSTRUCTIONS" section below. +A test program, pkt-gen, is also supplied. netmap.sys - implements the netmap API, VALE switch, netmap pipes, monitor ports. - Performance is similar to that on FreeBSD and Linux: 20Mpps on switch - ports, over 100 Mpps on pipes. + implements the netmap API, VALE switch, netmap pipes, monitor + ports. Performance is similar to that on FreeBSD and Linux: + 20Mpps on switch ports, over 100 Mpps on pipes. To load the module, do the following (as adminstrator) - (cd Output-Win8.1Release/netmap-pkg; ./nm-loader l) + (cd Output-Win8.1Release/netmap-pkg; ./nm-loader l) To test the functionality you can run the following programs in two terminals - ../examples/pkt-gen -i vale1:a -f tx # sender + ../examples/pkt-gen -i vale1:a -f tx # sender - ../examples/pkt-gen -i vale1:b -f rx # receiver + ../examples/pkt-gen -i vale1:b -f rx # receiver The module can be unloaded with - (cd WINDOWS/Output-Win8.1Release/netmap-pkg; ./nm-loader u) + (cd WINDOWS/Output-Win8.1Release/netmap-pkg; ./nm-loader u) nm-ndis.sys - implements access to the NIC and the host stack using native Windows - NIC drivers. Though faster than sockets, speed is limited by the device - drivers, in the 1-2Mpps range. + implements access to the NIC and the host stack using native + Windows NIC drivers. Though faster than sockets, speed is + limited by the device drivers, in the 1-2Mpps range. - The module must be installed as a network service (see "INSTALL NM-NDIS" below) - after which you can send or receive packets using any netmap application and + The module must be installed as a network service (see + "INSTALL NM-NDIS" below) after which you can send or receive + packets using any netmap application and netmap:ethXX - as the port name. XX is the Windows "interface index" that can be - shown with the followin command (or many other ways): + as the port name. XX is the Windows "interface index" that + can be shown with the followin command (or many other ways): netsh int ipv4 show interfaces Note that on 64-bit platforms kernel modules should be signed. -The build script we supply do a Test-sign of the modules, -and to load them you should run the system in Test-mode. +The build script we supply do a Test-sign of the modules, and to +load them you should run the Operating System in Test-mode. ------------- BUILD NETMAP MODULES ---------- +-------------------- BUILD INSTRUCTIONS -------------------- + +To build the kernel modules and the sample programs you need to +install the following tools (in this order): + + - Visual Studio 2013 (the "express" version suffices) -To build the module you need to install (in this order): - - Visual Studio 2013 (express is enough) https://www.visualstudio.com/downloads/download-visual-studio-vs - Note, you can reach the 2013 version from the - 'Download older versions' link + + Note, the 2013 version is reachable from the 'Download older versions' link - Windows Driver Kit (WDK) version 8.1 update + https://msdn.microsoft.com/en-us/windows/hardware/gg454513.aspx - - Cygwin, + - Cygwin + http://www.cygwin.com/ + with base packages, make, C compiler, possibly git and other utilities - used to build the userspace program pkt-gen + used to build the sample programs pkt-gen + +There are two build methods: a) Build with command line tools and MsBuild.exe + We have a makefile which builds everything, just run + make # will build all projects and pkt-gen + make clean # will clean output directories The output will be found in the directory ./Output- @@ -69,6 +82,7 @@ a) Build with command line tools and MsBuild.exe Please look at the makefile to select different configurations b) Build with the Visual studio GUI + - Open the \WINDOWS\VsSolution\Netmap.sln solution - Select on the build type combobox the type of Operating System (Win7/8/8.1) and the type of build (Debug/Release) @@ -77,11 +91,10 @@ b) Build with the Visual studio GUI \WINDOWS\Output-\ +------------------- INSTALL NETMAP.SYS ------------------- ------------- INSTALL NETMAP.SYS ------------ - -The easiest way to install the netmap core is manually, using the nm-loader -program that we build. +The easiest way to install the netmap core module NETMAP.SYS is to +use the nm-loader program that we build together with the programs: - Open a "cmd" window with administrative privileges - Change into the directory containing netmap.sys< typically @@ -89,22 +102,23 @@ program that we build. - To load the module, run ../nm-loader l - a message on the console window will report the success or failure of the operation + a message will report the success or failure of the operation - To unload the module run ../nm-loader u -You can also install the module persistently, as follows: +You can also install the module permanently, as follows: - Open the folder containing netmap.{sys|inf|cat} , same as above - Right click on the .inf file and select -INSTALL- from the context menu; after a reboot the module will be correctly loaded ------------- INSTALL NM-NDIS ------------ -The nm-ndis module implements a lightweight filter that runs as a service -on an adapter, and is used access the host stack and the NIC from netmap. +------------------- INSTALL NM-NDIS ------------------- + +The nm-ndis.sys module implements a lightweight filter that runs +as a service on an adapter, and is used access the host stack and +the NIC from netmap. It can be installed as follows: -netcfg - open the configuration panel for the network card in use " Control panel -> network and sharing center -> Change adapter settings " - right click on an adapter then click on " Properties " @@ -115,64 +129,115 @@ netcfg - click accept on the warnings for the installation of an unsigned or test-signed driver -If the netmap core module has been installed in a dynamic way, -remember to deinstall the nm-ndis module before a shutdown/reboot -and before unloading the netmap core module. +If the netmap.sys module is not installed permanently, remember to +deinstall the nm-ndis.sys module before a shutdown/reboot and before +unloading netmap.sys - - ------------- GENERAL TIPS ----------------------------------- -Performance testing requires a bit of attention to make sure -that processes do not move between different cores, that the -CPU clock speed does not change, and that the switch does not -drop packets because of unrecognised MAC addresses. - ---- Configuration of the software switch (Hyper-V) --- -- Always specify the MAC address of the sender -- By default, Hyper-V drops packets with MAC addresses not associated - to the given port. To allow any traffic, - Go into the configuration of the VM - under "setting->NIC->Advanced Features" - enable "Mac Spoofing" - ---- Pinning cores (Hyper-V) --- - Go into the configuration of the VM - under settings->Processor - Virtual machine reserve (percentage) 100 - (not clear if there is a way to pin the thread) - ---- PERFORMANCE --- +------------------- PERFORMANCE ------------------- The typical experiment involve one netmap sender and one netmap receiver pkt-gen-b -i -f tx ... pkt-gen-b -i -f rx ... -this version of pkt-gen uses busy-wait. The interface name can be a VALE port -or a NIC or host port using the nm-ndis module (emulating the netmap API, -so not as fast as the drivers using native netmap mode available on FreeBSD -and Linux). +The above version of pkt-gen uses busy-wait. The interface name can +be a VALE port or a NIC or host port using the nm-ndis module +(emulating the netmap API, so not as fast as the drivers using +native netmap mode available on FreeBSD and Linux). -VALE port: 30Mpps with broadcast frames + VALE port: 20-30Mpps with broadcast frames pkt-gen-b -i vale0:a -f tx pkt-gen-b -i vale0:b -f rx -NETMAP pipe up to 180 Mpps + NETMAP pipe up to 180 Mpps pkt-gen-b -i vale0:a}1 -f tx pkt-gen-b -i vale0:a{1 -f rx -NETMAP to HOST ring about 2.3 Mpps if dropped, 1.8Mpps to windump - pkt-gen-b -i netmap:1^ -f tx # on one vm + NETMAP to HOST ring about 2.3 Mpps if dropped, 1.8Mpps to windump + (replace the '5' with the inteface index from + netsh int ipv4 show interfaces + + pkt-gen-b -i netmap:eth5^ -f tx # on one vm + + NETMAP to NIC ring about 1Mpps VM-to-VM -NETMAP to NIC ring about 1Mpps VM-to-VM - pkt-gen-b -i netmap:1 -f tx # on one vm - pkt-gen-b -i netmap:1 -f rx # on another vm + pkt-gen-b -i netmap:eth5 -f tx # on one vm + pkt-gen-b -i netmap:eth5 -f rx # on another vm ------------- BRIEF DESCRIPTION ---------- -The solution Netmap.sln contains the following projects: +------------------- GENERAL TIPS ------------------- + +Performance testing requires a bit of attention to make sure that +processes do not move between different cores, that the CPU clock +speed does not change, and that the switch does not drop packets +because of unrecognised MAC addresses. + +--- Configuration of the software switch (Hyper-V) --- +- Always specify the MAC address of the sender +- By default, Hyper-V drops packets with MAC addresses not associated + to the given port. To allow any traffic, + Go into the configuration of the VM + under "setting->NIC->Advanced Features" + enable "Mac Spoofing" + +--- Pinning cores (Hyper-V) --- + Go into the configuration of the VM + under settings->Processor + Virtual machine reserve (percentage) 100 + (not clear if there is a way to pin the thread) + +------------------- CODE OVERVIEW ------------------- + +KERNEL CODE: +The core netmap kernel code in the sys/ directory is the same one +used also on FreeBSD and Linux. The WINDOWS/ directory contains +windows specific code, mostly to implement the I/O system calls and +mmap support, and remap FreeBSD or Linux kernel data structures and +functions into Windows equivalents. + +Access to the host stack and to NICs is implemented through an ndis +filter module, which is in the WINDOWS/nm-ndis directory. +The code comes from the examples contained in the Windows DDK (see +the license.rtf file) with small modifications -- about 500 lines +of code overall -- to interface with the main netmap module. + +We have an additional utility, nm-loader, to load and unload the +netmap kernel module from the command line (eventually we may +provide a similar one for the filter). + +To build the kernel modules we use the compiler from Visual Studio. + +For convenience, we have construted the "solution" file and the various +project files with VSC, and then manually cleaned up the .vcxprj files +to remove the infinite copies of the same set of options generated +by the GUI. The configurations include instructions to sign the drivers +in Test-sign mode. +We then have a simple Makefile that calls MSbuild with the correct +configuration options. + +Eventually we plan to invoke the compiler and signing tools directly +from the makefile avoidin the vcxprj and .sln files. + +USER APPLICATIONS: +Netmap user applications only use ioctl(), mmap() and poll() and do not +need any new system call. We compile them with gcc under Cygwin, +and eventually we expect the same source code to be compilable under +all platform that support netmap. + +Unfortunately, on Windows, mmap and poll are not +supported so we emulate them through special ioctl's. The mmap() +emulation is relatively straightforward -- netmap_user.h redefines +it to the wrapper function. For poll() things are a bit more +difficult, as we have not yet modified the Cygwin wrapprers that +support poll. As a consequence, at the moment our poll() emulation +only handles a single file descriptor. This is enough for our pkt-gen +program, but netmap programs using multiple file descriptors need +manual changes. + + +The file netmap.sln contains the following projects: netmap the core of the netmap kernel module, can be used by itself to @@ -194,9 +259,5 @@ loader a userspace program to dynamically load and unload the Netmap kernel module without the need to install it and load it at OS startup. -sysctl - (not complete) a tool to manipulate the sysctl variables in the netmap module. - Projects are visual studio files, .vcxprj . The format is defined in - http://blog.bfitz.us/?p=922 From e7cab14d0375a3de6fef0237673b3e92d70bdb9c Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 14 Aug 2015 21:31:31 -0700 Subject: [PATCH 08/18] remove stale code, clean up formatting --- WINDOWS/nm-ndis/filter.c | 27 +-------------------- WINDOWS/win_glue.h | 52 ++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 52 deletions(-) diff --git a/WINDOWS/nm-ndis/filter.c b/WINDOWS/nm-ndis/filter.c index a4d35dacf..cbdcbf1b9 100644 --- a/WINDOWS/nm-ndis/filter.c +++ b/WINDOWS/nm-ndis/filter.c @@ -1687,9 +1687,6 @@ N.B.: It is important to check the ReceiveFlags in NDIS_TEST_RECEIVE_CANNOT_PEND ULONG ReturnFlags; #endif - static int maxBatch = 0; - int qBatch = 0; - DEBUGP(DL_TRACE, "===>ReceiveNetBufferList: NetBufferLists = %p.\n", NetBufferLists); do { @@ -1772,24 +1769,12 @@ N.B.: It is important to check the ReceiveFlags in NDIS_TEST_RECEIVE_CANNOT_PEND */ if (netmap_hooks.handle_rx != NULL && (pFilter->intercept & NM_WIN_CATCH_RX)) { -#if 0 - static int packets = 0; /* debugging */ - //DbgPrint("Dropping packets... size: %i\n", (NET_BUFFER_LIST_FIRST_NB(NetBufferLists))->DataLength); - packets += 1; - if (packets == 100000) - { - DbgPrint("Recv 100k (%i)\n", (NET_BUFFER_LIST_FIRST_NB(NetBufferLists))->DataLength); - packets = 0; - } -#endif - { int result = -1; PNET_BUFFER pkt = NULL; PNET_BUFFER_LIST current_list = NetBufferLists; while (current_list && (pkt || NULL != (pkt = NET_BUFFER_LIST_FIRST_NB(current_list)) ) ) { - qBatch++; PVOID buffer = NdisGetDataBuffer(pkt, pkt->DataLength, NULL, 1, 0); if (buffer != NULL) { @@ -1798,8 +1783,6 @@ N.B.: It is important to check the ReceiveFlags in NDIS_TEST_RECEIVE_CANNOT_PEND * enqueues on an mbq and notifies */ } - //DbgPrint("Called: result= %i", result); - //DbgPrint("Data->pRxPointer: 0x%p &0x%p", netmap_hooks.pRxPointer, &netmap_hooks.pRxPointer); pkt = pkt->Next; if (pkt == NULL) @@ -1807,15 +1790,7 @@ N.B.: It is important to check the ReceiveFlags in NDIS_TEST_RECEIVE_CANNOT_PEND current_list = NET_BUFFER_LIST_NEXT_NBL(current_list); } } - } -#if 0 - if (qBatch > maxBatch) - { - maxBatch = qBatch; - DbgPrint("MaxBatch: %i", maxBatch); - } -#endif - NdisFReturnNetBufferLists(pFilter->FilterHandle, NetBufferLists, ReceiveFlags); + NdisFReturnNetBufferLists(pFilter->FilterHandle, NetBufferLists, ReceiveFlags); } else { diff --git a/WINDOWS/win_glue.h b/WINDOWS/win_glue.h index 12586b95c..6c41e24c8 100644 --- a/WINDOWS/win_glue.h +++ b/WINDOWS/win_glue.h @@ -36,7 +36,7 @@ #define _WIN32 /* we use _WIN32 throughout the code */ #else /* some MSC pragmas etc. */ -//Disabling unuseful warnings +/* Disabling some warnings */ #pragma warning(disable:4018) // expression: signed/unsigned mismatch #pragma warning(disable:4047) // operator: different levels of indirection #pragma warning(disable:4098) // void function returning a value - netmap_mem2.c @@ -81,7 +81,7 @@ #define M_ZERO 2 /* flags for malloc etc */ -//Originally defined in LINUX\IF.H +/* Originally defined in linux/if.h */ #define IFNAMSIZ 44//IF_NAMESIZE //defined in netioapi.h, is 256 //XXX_ale must set the same here and in userspace somehow @@ -140,7 +140,7 @@ typedef ULONG vm_ooffset_t; #ifdef _MSC_VER #define inline __inline #define __builtin_prefetch(x) _mm_prefetch(x, _MM_HINT_T2) -#endif //_MSC_VER +#endif /* _MSC_VER */ static void panic(const char *fmt, ...) { @@ -196,9 +196,9 @@ static inline void mtx_unlock(win_spinlock_t *m) #define BDG_RLOCK(b) ExAcquireResourceSharedLite(&b->bdg_lock,TRUE) #define BDG_RUNLOCK(b) ExReleaseResourceLite(&b->bdg_lock) #define BDG_RTRYLOCK(b) ExAcquireResourceExclusiveLite(&b->bdg_lock, FALSE) -#define BDG_SET_VAR(lval, p) ((lval) = (p)) +#define BDG_SET_VAR(lval, p) ((lval) = (p)) #define BDG_GET_VAR(lval) (lval) -#define BDG_FREE(p) free(p) +#define BDG_FREE(p) free(p) /* @@ -250,12 +250,12 @@ struct netmap_adapter; struct net_device { char if_xname[IFNAMSIZ]; // external name (name + unit) // struct ifaltq if_snd; /* output queue (includes altq) */ - struct netmap_adapter* na; - void * pfilter; - int * intercept; // bit 0: enable rx, bit 1 enable tx + struct netmap_adapter *na; + void *pfilter; + int *intercept; // bit 0: enable rx, bit 1 enable tx #define NM_WIN_CATCH_RX 1 #define NM_WIN_CATCH_TX 2 - int ifIndex; + int ifIndex; NPAGED_LOOKASIDE_LIST mbuf_pool; NPAGED_LOOKASIDE_LIST mbuf_packets_pool; @@ -481,15 +481,15 @@ PVOID send_up_to_stack(struct ifnet *ifp, struct mbuf *m, PVOID head); #define NM_BNS_GET(b) do { (void)(b); } while (0) #define NM_BNS_PUT(b) do { (void)(b); } while (0) -/********************************************************* -* ATOMIC OPERATIONS * -**********************************************************/ -#define NM_ATOMIC_T volatile long -#define atomic_t NM_ATOMIC_T +/* + * ATOMIC OPERATIONS + */ +#define NM_ATOMIC_T volatile long +#define atomic_t NM_ATOMIC_T #define NM_ATOMIC_TEST_AND_SET(p) (!InterlockedBitTestAndSet(p,0)) #define NM_ATOMIC_CLEAR(p) InterlockedBitTestAndReset(p,0) -#define refcount_acquire(_a) InterlockedExchangeAdd((atomic_t *)_a, 1) -#define refcount_release(_a) (InterlockedDecrement((atomic_t *)_a) <= 0) +#define refcount_acquire(_a) InterlockedExchangeAdd((atomic_t *)_a, 1) +#define refcount_release(_a) (InterlockedDecrement((atomic_t *)_a) <= 0) #define NM_ATOMIC_SET(p, v) InterlockedExchange(p, v) #define NM_ATOMIC_INC(p) InterlockedIncrement(p) #define NM_ATOMIC_READ_AND_CLEAR(p) InterlockedExchange(p, 0) @@ -515,9 +515,9 @@ PVOID send_up_to_stack(struct ifnet *ifp, struct mbuf *m, PVOID head); //-------------------------------------------------------- -/********************************************************* -* SYSCTL emulation (copied from dummynet/glue.h) * -**********************************************************/ +/* + * SYSCTL emulation (from dummynet/glue.h) + */ struct sock; // XXX unused int do_netmap_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len); @@ -578,12 +578,12 @@ struct sysctlhead { #define GST_HARD_LIMIT 100 /* In the module, GST is implemented as an array of -* sysctlentry, but while passing data to the userland -* pointers are useless, the buffer is actually made of: -* - sysctlhead (fixed size, containing lengths) -* - data (typically 32 bit) -* - name (zero-terminated and padded to mod4) -*/ + * sysctlentry, but while passing data to the userland + * pointers are useless, the buffer is actually made of: + * - sysctlhead (fixed size, containing lengths) + * - data (typically 32 bit) + * - name (zero-terminated and padded to mod4) + */ struct sysctlentry { struct sysctlhead head; @@ -695,4 +695,4 @@ int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size #endif -#endif //_WIN_GLUE_H +#endif /* _WIN_GLUE_H */ From 2011b3bbe1c40d9a73e5825ca94745e51114988b Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Fri, 14 Aug 2015 22:15:11 -0700 Subject: [PATCH 09/18] small tweaks to the text --- WINDOWS/README.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/WINDOWS/README.txt b/WINDOWS/README.txt index fd7097d7f..e6651a5ef 100644 --- a/WINDOWS/README.txt +++ b/WINDOWS/README.txt @@ -1,9 +1,9 @@ -This directory contains Windows version of netmap, developed by +This directory contains the Windows version of netmap, developed by Alessio Faina as part of his MS thesis at the Universita` di Pisa. -This version is made of two kernel modules, that you should build -as indicated in the "BUILD INSTRUCTIONS" section below. -A test program, pkt-gen, is also supplied. +This version is made of two kernel modules, that should be built +as indicated in the "BUILD INSTRUCTIONS" section below, +which also build the standard netmap test program, pkt-gen. netmap.sys implements the netmap API, VALE switch, netmap pipes, monitor @@ -27,8 +27,8 @@ A test program, pkt-gen, is also supplied. nm-ndis.sys implements access to the NIC and the host stack using native - Windows NIC drivers. Though faster than sockets, speed is - limited by the device drivers, in the 1-2Mpps range. + Windows NIC drivers. Though faster than sockets, speed is only + 1-2 Mpps, limited by the standard device drivers. The module must be installed as a network service (see "INSTALL NM-NDIS" below) after which you can send or receive @@ -42,8 +42,8 @@ A test program, pkt-gen, is also supplied. netsh int ipv4 show interfaces Note that on 64-bit platforms kernel modules should be signed. -The build script we supply do a Test-sign of the modules, and to -load them you should run the Operating System in Test-mode. +The build script does a Test-sign of the modules, and to load them one +should run the Operating System in Test-mode. -------------------- BUILD INSTRUCTIONS -------------------- From b2ee383d90e9d920a4d271f4f583831bdf55fabd Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Sat, 15 Aug 2015 18:11:53 -0700 Subject: [PATCH 10/18] we need the contigmalloc'ed memory to be zeroed --- WINDOWS/win_glue.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/WINDOWS/win_glue.h b/WINDOWS/win_glue.h index 6c41e24c8..8fc0ad9d3 100644 --- a/WINDOWS/win_glue.h +++ b/WINDOWS/win_glue.h @@ -371,9 +371,6 @@ static void nm_os_mitigation_cleanup(struct nm_generic_mit *mit) * KERNEL MEMORY ALLOCATION and management */ -char* win_contigMalloc(int sz, int page_size); -void win_ContigFree(void* virtualAddress); - #define bcopy(_s, _d, _l) RtlCopyMemory(_d, _s, _l) #define bzero(addr, size) RtlZeroMemory(addr, size) #define malloc(size, _ty, flags) win_kernel_malloc(size, _ty, flags) @@ -498,6 +495,17 @@ PVOID send_up_to_stack(struct ifnet *ifp, struct mbuf *m, PVOID head); #define make_dev_credf(_a, _b, ...) ((void *)1) // non-null +static char * +win_contigmalloc(int sz, int page_size) +{ + char* p = ExAllocatePoolWithTag(NonPagedPool, sz, M_NETMAP); + + if (p != NULL) { /* we rely on this memory to be zero-ed */ + RtlZeroMemory(p, sz); + } + return p; +} + /* * At the moment we can just do regular malloc on Windows. * The only use for contigmalloc would be for netmap buffers @@ -507,7 +515,7 @@ PVOID send_up_to_stack(struct ifnet *ifp, struct mbuf *m, PVOID head); * would work for that, but they are incredibly slow. */ #define contigmalloc(sz, ty, flags, a, b, pgsz, c) \ - ExAllocatePoolWithTag(NonPagedPool, sz, M_NETMAP) + win_contigmalloc(sz, M_NETMAP) #define contigfree(va, sz, ty) ExFreePoolWithTag(va, M_NETMAP) #define vtophys MmGetPhysicalAddress From d4bf89c0804fcd222f37cfebf1ce920bf08e44f9 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Sat, 15 Aug 2015 18:12:32 -0700 Subject: [PATCH 11/18] actually catch packets from the host stack --- WINDOWS/nm-ndis/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WINDOWS/nm-ndis/filter.c b/WINDOWS/nm-ndis/filter.c index cbdcbf1b9..904749452 100644 --- a/WINDOWS/nm-ndis/filter.c +++ b/WINDOWS/nm-ndis/filter.c @@ -1500,7 +1500,7 @@ Routine Description: * can write the handle_tx as a function that queues the packets in an mbq * XXX at the moment, however, just make a deep copy */ - if (netmap_hooks.handle_tx != NULL && (pFilter->intercept & NM_WIN_CATCH_TX) && 0) { + if (netmap_hooks.handle_tx != NULL && (pFilter->intercept & NM_WIN_CATCH_TX)) { int result = -1; PNET_BUFFER pkt = NULL; PNET_BUFFER_LIST current_list = NetBufferLists; From 5b074802899d515557560c0cd0c8f1e53eb2ae44 Mon Sep 17 00:00:00 2001 From: Luigi Rizzo Date: Tue, 25 Aug 2015 11:51:15 +0200 Subject: [PATCH 12/18] remove stale comment --- sys/dev/netmap/netmap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index aa531a5c4..c1c6393e3 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1173,10 +1173,8 @@ netmap_txsync_to_host(struct netmap_adapter *na) * We protect access to the kring using kring->rx_queue.lock * * This routine also does the selrecord if called from the poll handler - * (we know because td != NULL). + * (we know because sr != NULL). * - * NOTE: on linux, selrecord() is defined as a macro and uses pwait - * as an additional hidden argument. * returns the number of packets delivered to tx queues in * transparent mode, or a negative value if error */ From 82747fa88b1d3267da477dd1119e5fdb2fa4e9b0 Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Wed, 12 Aug 2015 23:04:48 +0200 Subject: [PATCH 13/18] use accessor (with provision for multiple mempools) to get allocator --- sys/dev/netmap/netmap.c | 4 ++-- sys/dev/netmap/netmap_mem2.c | 5 +++++ sys/dev/netmap/netmap_mem2.h | 5 ++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index c1c6393e3..096879ca0 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -2110,7 +2110,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread NMG_LOCK(); do { /* memsize is always valid */ - struct netmap_mem_d *nmd = &nm_mem; + struct netmap_mem_d *nmd = netmap_mem_get_allocator(0); u_int memflags; if (nmr->nr_name[0] != '\0') { @@ -2659,7 +2659,7 @@ netmap_attach_common(struct netmap_adapter *na) if (na->nm_mem == NULL) /* use the global allocator */ - na->nm_mem = &nm_mem; + na->nm_mem = netmap_mem_get_allocator(0); netmap_mem_get(na->nm_mem); #ifdef WITH_VALE if (na->nm_bdg_attach == NULL) diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 0beeec281..7e53246d6 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -1401,6 +1401,11 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) return nmd->lasterr; } +struct netmap_mem_d * +netmap_mem_get_allocator(int nm_id){ + return &nm_mem; +} + static void diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index a89b3f728..ecb1b2607 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -113,10 +113,9 @@ * reconfiguration. The private allocators release all their memory when there * are no active users. By 'active user' we mean an existing netmap_priv * structure holding a reference to the allocator. - */ - -extern struct netmap_mem_d nm_mem; + */ +struct netmap_mem_d * netmap_mem_get_allocator(int nm_id); int netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *); vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t); #ifdef _WIN32 From af3724e0bd76621ddd52aef1736fc22f9a3fcd4e Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Wed, 19 Aug 2015 11:22:46 +0200 Subject: [PATCH 14/18] initial support for multiple global memory pools --- sys/dev/netmap/netmap_mem2.c | 81 ++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 7e53246d6..b95b59780 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -365,24 +365,24 @@ static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = { * Virtual (VALE) ports will have each its own allocator. */ extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */ -struct netmap_mem_d nm_mem = { /* Our memory allocator. */ +struct netmap_mem_d nm_mem_blueprint = { .pools = { [NETMAP_IF_POOL] = { - .name = "netmap_if", + .name = "netmap_%d_if", .objminsize = sizeof(struct netmap_if), .objmaxsize = 4096, .nummin = 10, /* don't be stingy */ .nummax = 10000, /* XXX very large */ }, [NETMAP_RING_POOL] = { - .name = "netmap_ring", + .name = "netmap_%d_ring", .objminsize = sizeof(struct netmap_ring), .objmaxsize = 32*PAGE_SIZE, .nummin = 2, .nummax = 1024, }, [NETMAP_BUF_POOL] = { - .name = "netmap_buf", + .name = "netmap_%d_buf", .objminsize = 64, .objmaxsize = 65536, .nummin = 4, @@ -390,17 +390,22 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ }, }, - .nm_id = 1, + .nm_id = -1, .nm_grp = -1, - .prev = &nm_mem, - .next = &nm_mem, + .prev = NULL, + .next = NULL, .ops = &netmap_mem_global_ops }; -static struct netmap_mem_d *netmap_last_mem_d = &nm_mem; +/* number of global memory pools, should be dynamic */ +#define NGPOOLS 2 + +struct netmap_mem_d nm_mems[NGPOOLS]; /* Our global memory allocator. */ + +static struct netmap_mem_d *netmap_last_mem_d = &nm_mems[NGPOOLS - 1]; /* blueprint for the private memory allocators */ extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */ @@ -440,17 +445,18 @@ static const struct netmap_mem_d nm_blueprint = { #define STRINGIFY(x) #x +/* TODO: not much sense for nm_mem_blueprint */ #define DECLARE_SYSCTLS(id, name) \ SYSBEGIN(mem2_ ## name); \ SYSCTL_DECL(_dev_netmap); /* leave it here, easier for porting */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ - CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ + CTLFLAG_RD, &nm_mem_blueprint.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ - CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ + CTLFLAG_RD, &nm_mem_blueprint.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \ CTLFLAG_RW, &netmap_min_priv_params[id].size, 0, \ "Default size of private netmap " STRINGIFY(name) "s"); \ @@ -463,7 +469,7 @@ DECLARE_SYSCTLS(NETMAP_IF_POOL, if); DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); -/* call with NMA_LOCK(&nm_mem) held */ +/* call with NMA_LOCK(&nm_mems[0]) held */ static int nm_mem_assign_id_locked(struct netmap_mem_d *nmd) { @@ -475,7 +481,7 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) /* we rely on unsigned wrap around */ id = scan->nm_id + 1; if (id == 0) /* reserve 0 as error value */ - id = 1; + id = NGPOOLS; scan = scan->next; if (id != scan->nm_id) { nmd->nm_id = id; @@ -492,15 +498,15 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) return error; } -/* call with NMA_LOCK(&nm_mem) *not* held */ +/* call with NMA_LOCK(&nm_mems[0]) *not* held */ static int nm_mem_assign_id(struct netmap_mem_d *nmd) { int ret; - NMA_LOCK(&nm_mem); + NMA_LOCK(&nm_mems[0]); ret = nm_mem_assign_id_locked(nmd); - NMA_UNLOCK(&nm_mem); + NMA_UNLOCK(&nm_mems[0]); return ret; } @@ -508,7 +514,7 @@ nm_mem_assign_id(struct netmap_mem_d *nmd) static void nm_mem_release_id(struct netmap_mem_d *nmd) { - NMA_LOCK(&nm_mem); + NMA_LOCK(&nm_mems[0]); nmd->prev->next = nmd->next; nmd->next->prev = nmd->prev; @@ -518,7 +524,7 @@ nm_mem_release_id(struct netmap_mem_d *nmd) nmd->prev = nmd->next = NULL; - NMA_UNLOCK(&nm_mem); + NMA_UNLOCK(&nm_mems[0]); } static int @@ -1401,9 +1407,10 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) return nmd->lasterr; } +/* always return something sensible, even if nm_id is out of range */ struct netmap_mem_d * netmap_mem_get_allocator(int nm_id){ - return &nm_mem; + return &nm_mems[nm_id % NGPOOLS]; } @@ -1617,27 +1624,49 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd) static void netmap_mem_global_delete(struct netmap_mem_d *nmd) { - int i; + unsigned int i, j; - for (i = 0; i < NETMAP_POOLS_NR; i++) { - netmap_destroy_obj_allocator(&nm_mem.pools[i]); + for (i = 0; i < NGPOOLS; i++) { + struct netmap_mem_d *nmd = &nm_mems[i]; + for (j = 0; j < NETMAP_POOLS_NR; j++) { + netmap_destroy_obj_allocator(&nmd->pools[j]); + } + NMA_LOCK_DESTROY(nmd); } - - NMA_LOCK_DESTROY(&nm_mem); } int netmap_mem_init(void) { - NMA_LOCK_INIT(&nm_mem); - netmap_mem_get(&nm_mem); + unsigned int i, j; + + for(i = 0; i < NGPOOLS; i++){ + struct netmap_mem_d *nmd = &nm_mems[i]; + *nmd = nm_mem_blueprint; + NMA_LOCK_INIT(nmd); + nmd->nm_id = i + 1; + nmd->prev = nm_mems + ((i - 1) % NGPOOLS); + nmd->next = nm_mems + ((i + 1) % NGPOOLS); + for (j = 0; j < NETMAP_POOLS_NR; j++){ + struct netmap_obj_pool *ref = &nm_mem_blueprint.pools[j]; + struct netmap_obj_pool *p = &nmd->pools[j]; + snprintf(p->name, NETMAP_POOL_MAX_NAMSZ, ref->name, nmd->nm_id); + } + netmap_mem_get(nmd); + } + return (0); } void netmap_mem_fini(void) { - netmap_mem_put(&nm_mem); + unsigned int i; + + for(i = 0; i < NGPOOLS; i++){ + struct netmap_mem_d *nmd = &nm_mems[i]; + netmap_mem_put(nmd); + } } static void From 042b9f85dcf694e8c11534d5bda8fb1333988c87 Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Wed, 19 Aug 2015 11:28:30 +0200 Subject: [PATCH 15/18] pass request details to netmap_get_hw_na(), too --- sys/dev/netmap/netmap.c | 4 ++-- sys/dev/netmap/netmap_kern.h | 2 +- sys/dev/netmap/netmap_vale.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 096879ca0..408c7b76f 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1266,7 +1266,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, NM_SELRECORD_T *sr) */ int -netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na) +netmap_get_hw_na(struct nmreq *nmr, struct ifnet *ifp, struct netmap_adapter **na) { /* generic support */ int i = netmap_admode; /* Take a snapshot. */ @@ -1420,7 +1420,7 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create) return ENXIO; } - error = netmap_get_hw_na(ifp, &ret); + error = netmap_get_hw_na(nmr, ifp, &ret); if (error) goto out; diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index ded59e67b..94294e456 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1199,7 +1199,7 @@ int netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg); int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create); -int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na); +int netmap_get_hw_na(struct nmreq *nmr, struct ifnet *ifp, struct netmap_adapter **na); #ifdef WITH_VALE diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index b48e925ab..4043e84fd 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -688,7 +688,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create) } else { struct netmap_adapter *hw; - error = netmap_get_hw_na(ifp, &hw); + error = netmap_get_hw_na(nmr, ifp, &hw); if (error || hw == NULL) goto out; From ec705a761df3bc429d5e179f1b13fc2677e47037 Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Wed, 26 Aug 2015 15:53:08 +0000 Subject: [PATCH 16/18] use nr_arg2 from ioctl's argument to select a memory pool --- sys/dev/netmap/netmap.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 408c7b76f..8f96fd080 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1340,6 +1340,16 @@ netmap_get_hw_na(struct nmreq *nmr, struct ifnet *ifp, struct netmap_adapter **n } ND("Created generic NA %p (prev %p)", gna, gna->prev); + if( nmr->nr_arg2 ){ + int poolno = nmr->nr_arg2 - 1; + struct netmap_mem_d *old = (*na)->nm_mem; + struct netmap_mem_d *new = netmap_mem_get_allocator( poolno ); + netmap_mem_put(old); + netmap_mem_get(new); + (*na)->nm_mem = new; + ND("force mempool #%d for %s: %p -> %p", poolno, ifp->if_xname, old, new); + } + return 0; #else /* !WITH_GENERIC */ return EOPNOTSUPP; From 5845acf38aa2485b6de26d392146c0d94f1c70e0 Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Tue, 25 Aug 2015 09:45:57 +0200 Subject: [PATCH 17/18] map global memory pool to numa node (1:1) --- sys/dev/netmap/netmap_mem2.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index b95b59780..7025b8b2b 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -191,6 +191,7 @@ struct netmap_mem_d { nm_memid_t nm_id; /* allocator identifier */ int nm_grp; /* iommu groupd id */ + int nm_numa; /* numa node obj_pool is bound to */ /* list of all existing allocators, sorted by nm_id */ struct netmap_mem_d *prev, *next; @@ -392,6 +393,7 @@ struct netmap_mem_d nm_mem_blueprint = { .nm_id = -1, .nm_grp = -1, + .nm_numa = -1, .prev = NULL, .next = NULL, @@ -437,6 +439,8 @@ static const struct netmap_mem_d nm_blueprint = { .flags = NETMAP_MEM_PRIVATE, + .nm_grp = -1, + .ops = &netmap_mem_private_ops }; @@ -1169,7 +1173,7 @@ nm_alloc_lut(u_int nobj) /* call with NMA_LOCK held */ static int -netmap_finalize_obj_allocator(struct netmap_obj_pool *p) +netmap_finalize_obj_allocator(struct netmap_obj_pool *p, int numanode) { int i; /* must be signed */ size_t n; @@ -1376,7 +1380,7 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) nmd->lasterr = 0; nmd->nm_totalsize = 0; for (i = 0; i < NETMAP_POOLS_NR; i++) { - nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i]); + nmd->lasterr = netmap_finalize_obj_allocator(&nmd->pools[i], nmd->nm_numa); if (nmd->lasterr) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; @@ -1645,6 +1649,7 @@ netmap_mem_init(void) *nmd = nm_mem_blueprint; NMA_LOCK_INIT(nmd); nmd->nm_id = i + 1; + nmd->nm_numa = i; nmd->prev = nm_mems + ((i - 1) % NGPOOLS); nmd->next = nm_mems + ((i + 1) % NGPOOLS); for (j = 0; j < NETMAP_POOLS_NR; j++){ From cc6ec5b3c1ab82a8c2c4ea94cdb4c9c5cc9e99b2 Mon Sep 17 00:00:00 2001 From: jozsef imrek Date: Tue, 25 Aug 2015 09:50:48 +0200 Subject: [PATCH 18/18] make contigmalloc() numa aware (with ugly abuse of preprocessor macros) --- LINUX/bsd_glue.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/LINUX/bsd_glue.h b/LINUX/bsd_glue.h index 4d5192eb2..341346444 100644 --- a/LINUX/bsd_glue.h +++ b/LINUX/bsd_glue.h @@ -325,10 +325,15 @@ static inline int ilog2(uint64_t n) } #endif /* ilog2 */ +/* + * TODO: ugly hack: abuse that contigmalloc() is only called from + * netmap_finalize_obj_allocator(), and numanode exists in that + * context. + */ #define contigmalloc(sz, ty, flags, a, b, pgsz, c) ({ \ unsigned int order_ = \ ilog2(roundup_pow_of_two(sz)/PAGE_SIZE); \ - struct page *p_ = alloc_pages(GFP_ATOMIC | __GFP_ZERO, \ + struct page *p_ = alloc_pages_node(numanode, GFP_ATOMIC | __GFP_ZERO, \ order_); \ if (p_ != NULL) \ split_page(p_, order_); \