diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..6c6ab3f --- /dev/null +++ b/.clang-format @@ -0,0 +1,26 @@ +# clang-format 10.0 + +# general +BasedOnStyle: WebKit +ColumnLimit: 120 +SortIncludes: false +AllowShortFunctionsOnASingleLine: None +IndentGotoLabels: false +BreakBeforeBinaryOperators: None + +# indentation +IndentWidth: 8 +TabWidth: 8 +UseTab: ForIndentation +IndentCaseLabels: true + +# alignment +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: true +AlignConsecutiveMacros: true +PointerAlignment: Right +AlignAfterOpenBracket: Align + +# whitespace management +SpacesInParentheses: true +SpacesInCStyleCastParentheses: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c731f5a --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +Makefile.in +aclocal.m4 +autom4te.cache/ +compile +config.guess +config.h.in +config.sub +configure +depcomp +examples/Makefile.in +include/Makefile.in +install-sh +ltmain.sh +m4/ +missing +src/Makefile.in diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..450d161 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,8 @@ +Mihai Donțu +Adalbert Lazăr +Marian Rotariu +Nicușor Cîțu +Mircea Cîrjaliu +Yang Weijiang +Ștefan Șicleru +Cătălin Marian Pichiu diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..8865734 --- /dev/null +++ b/INSTALL @@ -0,0 +1,368 @@ +Installation Instructions +************************* + + Copyright (C) 1994-1996, 1999-2002, 2004-2016 Free Software +Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell command './configure && make && make install' +should configure, build, and install this package. The following +more-detailed instructions are generic; see the 'README' file for +instructions specific to this package. Some packages provide this +'INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The 'configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that +you can run in the future to recreate the current configuration, and a +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). + + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can +be considered for the next release. If you are using the cache, and at +some point 'config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. + + The simplest way to compile this package is: + + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. + + Running 'configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type 'make' to compile the package. + + 3. Optionally, type 'make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type 'make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the 'make install' phase executed with root + privileges. + + 5. Optionally, type 'make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior 'make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type 'make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide 'make + distcheck', which can by used by developers to test that all other + targets like 'make install' and 'make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the 'configure' script does not know about. Run './configure --help' +for details on some of the pertinent environment variables. + + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU 'make'. 'cd' to the +directory where you want the object files and executables to go and run +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. + + With a non-GNU 'make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use 'make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the 'lipo' tool if you have problems. + +Installation Names +================== + + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to 'configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +'make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, 'make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the +package recognizes. + + For packages that use the X Window System, 'configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of 'make' will be. For these packages, running './configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with 'make V=1'; while running './configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with 'make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX 'make' updates targets which have the same time stamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its '' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. + + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features 'configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, 'configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option '--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with '--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to 'configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the 'configure' command line, using 'VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified 'gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +'configure' Invocation +====================== + + 'configure' recognizes the following options to control how it +operates. + +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. + +'--help=short' +'--help=recursive' + Print a summary of the options unique to this package's + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. + +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' + script, and exit. + +'--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally 'config.cache'. FILE defaults to '/dev/null' to + disable caching. + +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. + +'--quiet' +'--silent' +'-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to '/dev/null' (any error + messages will still be shown). + +'--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + 'configure' can determine that directory automatically. + +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. + +'--no-create' +'-n' + Run the configure checks, but stop before creating any output + files. + +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..25d6098 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,6 @@ +SUBDIRS = src include examples +EXTRA_DIST = bootstrap +ACLOCAL_AMFLAGS = -I m4 + +pkgconfigdir = ${libdir}/pkgconfig +pkgconfig_DATA = libkvmi.pc diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md new file mode 100644 index 0000000..7e5a18e --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# Libkvmi + +(c) 2017-2020 Bitdefender SRL + +## Usage + +To test the library, issue: +``` +$ ./bootstrap +$ ./configure +$ make +``` +This will build the library and the test under `examples/`. + +To see the test in action, ensure you have a Linux kernel built with +CONFIG_KVM_INTROSPECTION and already installed +([kvmi-v7 branch](https://github.com/KVM-VMI/kvm/tree/kvmi-v7)). + + Virtualization + Kernel-based Virtual Machine (KVM) support + KVM Introspection + +You also need QEMU built with VM introspection support +([kvmi-v7 branch](https://github.com/KVM-VMI/qemu/tree/kvmi-v7)). + +In the `examples/` subdirectory run: +``` +# ./hookguest-libkvmi /tmp/introspector +``` +then simply start a KVM domain up with: +``` + qemu-system-x86_64 ... \ + -enable-kvm \ + -chardev socket,path=/tmp/introspector,id=chardev0,reconnect=10 \ + -object introspection,id=kvmi,chardev=chardev0 +``` + +The application can be shut down at any time via `^C`. diff --git a/bootstrap b/bootstrap new file mode 100755 index 0000000..c5a7472 --- /dev/null +++ b/bootstrap @@ -0,0 +1,3 @@ +#!/bin/sh + +autoreconf --install diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..23e90f7 --- /dev/null +++ b/configure.ac @@ -0,0 +1,31 @@ +AC_INIT([libkvmi], [0.1]) + +LT_INIT + +AC_CONFIG_SRCDIR(src/kvmi.c) +AC_CONFIG_MACRO_DIR([m4]) +AC_CONFIG_HEADERS(config.h) + +CFLAGS="$CFLAGS -W -Wall" + +AC_ARG_ENABLE(debug, + [ --enable-debug compile with gdb debug information], + CFLAGS="$CFLAGS -g") + +AC_ARG_ENABLE(optimize, + [ --enable-optimize optimize compiled code (-O2)], + CFLAGS="$CFLAGS -O2") + +AM_INIT_AUTOMAKE +AC_PROG_CC +AC_PROG_INSTALL +AC_PROG_LIBTOOL + +AC_CANONICAL_HOST +AS_CASE([$host_cpu], + [x86_64], [ARCH=x86_64], + [AC_MSG_ERROR([No support for CPU $host_cpu])] +) +AC_SUBST([ARCH]) + +AC_OUTPUT(Makefile src/Makefile include/Makefile examples/Makefile libkvmi.pc) diff --git a/examples/Makefile.am b/examples/Makefile.am new file mode 100644 index 0000000..70b8621 --- /dev/null +++ b/examples/Makefile.am @@ -0,0 +1,6 @@ +AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/include/linux/$(ARCH) + +bin_PROGRAMS = hookguest-libkvmi + +hookguest_libkvmi_SOURCES = hookguest.c +hookguest_libkvmi_LDADD = $(top_builddir)/src/libkvmi.la -ldl diff --git a/examples/hookguest.c b/examples/hookguest.c new file mode 100644 index 0000000..797adca --- /dev/null +++ b/examples/hookguest.c @@ -0,0 +1,424 @@ +/* + * Copyright (C) 2017-2020 Bitdefender S.R.L. + * + * The program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * The program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * http://www.gnu.org/licenses + */ + +#include +#include +#include +#include +#include + +#include + +#define MAX_VCPU 256 + +#define EPT_TEST_PAGES 20 +#define PAGE_SIZE 4096 +#define WAIT_30s ( 30 * 1000 ) + +#define CR3 3 +#define CR4 4 + +#define MSR_STAR 0xc0000081 + +static void *Dom; + +static const char *access_str[] = { + "---", "r--", "-w-", "rw-", "--x", "r-x", "-wx", "rwx", +}; + +static void die( const char *msg ) +{ + perror( msg ); + exit( 1 ); +} + +static void setup_vcpu_reply( struct kvmi_dom_event *ev, struct kvmi_vcpu_hdr *rpl, int action ) +{ + struct kvmi_event_reply *common = (struct kvmi_event_reply *)( rpl + 1 ); + + memset( rpl, 0, sizeof( *rpl ) ); + rpl->vcpu = ev->event.common.vcpu; + + memset( common, 0, sizeof( *common ) ); + common->action = action; + common->event = ev->event.common.event; +} + +static void reply_continue( void *dom, struct kvmi_dom_event *ev, struct kvmi_vcpu_hdr *rpl, size_t rpl_size ) +{ + setup_vcpu_reply( ev, rpl, KVMI_EVENT_ACTION_CONTINUE ); + + printf( "Reply with CONTINUE (vcpu%u)\n", ev->event.common.vcpu ); + + if ( kvmi_reply_event( dom, ev->seq, rpl, rpl_size ) ) + die( "kvmi_reply_event" ); +} + +static void reply_retry( void *dom, struct kvmi_dom_event *ev, struct kvmi_vcpu_hdr *rpl, size_t rpl_size ) +{ + setup_vcpu_reply( ev, rpl, KVMI_EVENT_ACTION_RETRY ); + + printf( "Reply with RETRY (vcpu%u)\n", ev->event.common.vcpu ); + + if ( kvmi_reply_event( dom, ev->seq, rpl, rpl_size ) ) + die( "kvmi_reply_event" ); +} + +static void handle_cr_event( void *dom, struct kvmi_dom_event *ev ) +{ + struct kvmi_event_cr *cr = &ev->event.cr; + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_event_reply common; + struct kvmi_event_cr_reply cr; + } rpl = { 0 }; + + printf( "CR%d 0x%llx -> 0x%llx (vcpu%u)\n", cr->cr, cr->old_value, cr->new_value, ev->event.common.vcpu ); + + rpl.cr.new_val = cr->new_value; + reply_continue( dom, ev, &rpl.hdr, sizeof( rpl ) ); +} + +static void handle_msr_event( void *dom, struct kvmi_dom_event *ev ) +{ + struct kvmi_event_msr *msr = &ev->event.msr; + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_event_reply common; + struct kvmi_event_msr_reply msr; + } rpl = { 0 }; + + printf( "MSR 0x%x 0x%llx -> 0x%llx (vcpu%u)\n", msr->msr, msr->old_value, msr->new_value, + ev->event.common.vcpu ); + + rpl.msr.new_val = msr->new_value; + reply_continue( dom, ev, &rpl.hdr, sizeof( rpl ) ); +} + +static void enable_vcpu_events( void *dom, unsigned int vcpu ) +{ + bool enable = true; + + printf( "Enabling CR, MSR and PF events (vcpu%u)\n", vcpu ); + + if ( kvmi_control_events( dom, vcpu, KVMI_EVENT_CR, enable ) + || kvmi_control_events( dom, vcpu, KVMI_EVENT_MSR, enable ) + || kvmi_control_events( dom, vcpu, KVMI_EVENT_PF, enable ) ) + die( "kvmi_control_events" ); + + if ( vcpu == 0 ) { + printf( "Enabling CR3 events...\n" ); + + if ( kvmi_control_cr( dom, vcpu, CR3, enable ) ) + die( "kvmi_control_cr(3)" ); + } + + printf( "Enabling CR4 events...\n" ); + + if ( kvmi_control_cr( dom, vcpu, CR4, enable ) ) + die( "kvmi_control_cr(4)" ); + + printf( "Enabling MSR_STAR events...\n" ); + + if ( kvmi_control_msr( dom, vcpu, MSR_STAR, enable ) ) + die( "kvmi_control_msr(STAR)" ); +} + +static void handle_pause_vcpu_event( void *dom, struct kvmi_dom_event *ev ) +{ + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_event_reply common; + } rpl = { 0 }; + unsigned int vcpu = ev->event.common.vcpu; + static bool events_enabled[MAX_VCPU]; + + printf( "PAUSE (vcpu%u)\n", vcpu ); + + if ( vcpu < MAX_VCPU && !events_enabled[vcpu] ) { + enable_vcpu_events( dom, vcpu ); + events_enabled[vcpu] = true; + } + + reply_continue( dom, ev, &rpl.hdr, sizeof( rpl ) ); +} + +static void set_page_access( void *dom, __u64 gpa, __u8 access ) +{ + printf( "Set page access gpa 0x%llx access %s [0x%x]\n", gpa, access_str[access & 7], access ); + + if ( kvmi_set_page_access( dom, &gpa, &access, 1, 0 ) ) + die( "kvmi_set_page_access" ); +} + +static void write_protect_page( void *dom, __u64 gpa ) +{ + set_page_access( dom, gpa, KVMI_PAGE_ACCESS_R | KVMI_PAGE_ACCESS_X ); +} + +static void maybe_start_pf_test( void *dom, struct kvmi_dom_event *ev ) +{ + static bool started; + __u64 cr3 = ev->event.common.arch.sregs.cr3; + __u16 vcpu = ev->event.common.vcpu; + __u64 pt = cr3 & ~0xfff; + + if ( started || !pt ) + return; + + printf( "Starting #PF test with CR3 0x%llx (vcpu%u)\n", cr3, vcpu ); + + for ( __u64 end = pt + EPT_TEST_PAGES * PAGE_SIZE; pt < end; pt += PAGE_SIZE ) + write_protect_page( dom, pt ); + + started = true; + + if ( ev->event.common.event == KVMI_EVENT_CR ) { + bool enable = false; + + printf( "Disabling CR3 events (vcpu=%d)...\n", vcpu ); + + if ( kvmi_control_cr( dom, vcpu, CR3, enable ) ) + die( "kvmi_control_cr(3)" ); + } +} + +static void handle_pf_event( void *dom, struct kvmi_dom_event *ev ) +{ + struct kvmi_event_pf *pf = &ev->event.page_fault; + __u16 vcpu = ev->event.common.vcpu; + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_event_reply common; + struct kvmi_event_pf_reply pf; + } rpl = {}; + __u8 access = KVMI_PAGE_ACCESS_R | KVMI_PAGE_ACCESS_W | KVMI_PAGE_ACCESS_X; + + printf( "PF gva 0x%llx gpa 0x%llx access %s [0x%x] (vcpu%u)\n", pf->gva, pf->gpa, access_str[pf->access & 7], + pf->access, vcpu ); + + set_page_access( dom, pf->gpa, access ); + + reply_retry( dom, ev, &rpl.hdr, sizeof( rpl ) ); +} + +static void handle_event( void *dom, struct kvmi_dom_event *ev ) +{ + unsigned int id = ev->event.common.event; + + switch ( id ) { + case KVMI_EVENT_CR: + maybe_start_pf_test( dom, ev ); + handle_cr_event( dom, ev ); + break; + case KVMI_EVENT_MSR: + handle_msr_event( dom, ev ); + break; + case KVMI_EVENT_PAUSE_VCPU: + maybe_start_pf_test( dom, ev ); + handle_pause_vcpu_event( dom, ev ); + break; + case KVMI_EVENT_PF: + handle_pf_event( dom, ev ); + break; + default: + fprintf( stderr, "Unknown event %d\n", id ); + exit( 1 ); + } +} + +static void pause_vm( void *dom ) +{ + unsigned int count = 0; + + if ( kvmi_get_vcpu_count( dom, &count ) ) + die( "kvmi_get_vcpu_count" ); + + printf( "Sending the pause command...\n" ); + + if ( kvmi_pause_all_vcpus( dom, count ) ) + die( "kvmi_pause_all_vcpus" ); + + printf( "We should receive %u pause events\n", count ); +} + +static int new_guest( void *dom, unsigned char ( *uuid )[16], void *ctx ) +{ + unsigned long long max_gfn; + int k; + + printf( "New guest: " ); + + for ( k = 0; k < 16; k++ ) + printf( "%.2x ", ( *uuid )[k] ); + + printf( "fd %d ctx %p\n", kvmi_connection_fd( dom ), ctx ); + + pause_vm( dom ); + + if ( kvmi_get_maximum_gfn( dom, &max_gfn ) ) + die( "kvmi_get_maximum_gfn" ); + + printf( "Max gfn: 0x%llx\n", max_gfn ); + + Dom = dom; + + return 0; +} + +static int new_handshake( const struct kvmi_qemu2introspector *qemu, struct kvmi_introspector2qemu *intro, void *ctx ) +{ + (void)intro; (void)ctx; + printf( "New handshake: name '%s' start_time %ld\n", qemu->name, qemu->start_time ); + return 0; +} + +static void log_cb( kvmi_log_level level, const char *s, void *ctx ) +{ + (void)ctx; + printf( "[level=%d]: %s\n", level, s ); +} + +static int spp_bypass = 0; + +static void spp_bitmap_test( void *dom) +{ + int ret = 0; + __u64 cmd = 777; + __u64 gfn; + __u64 gpa; + __u32 bitmap = 0; + __u32 origin_bitmap; + + char buff[64] = {0}; + + printf("please input gfn :\n\ + 777 for bypass spp test,\n\ + 888 to skip this round.\n"); + + if (fgets(buff, 63, stdin)) + cmd = atoll(buff); + + if(cmd == 777) + { + spp_bypass = 1; + return; + } + + if(cmd == 888) + return; + + printf("input gfn: 0x%llx(%lld)\n", cmd, cmd); + + + gfn = cmd; + + memset(buff, 0, sizeof(buff)); + + printf("please input spp bitmap:\n"); + + if (fgets(buff, 63, stdin)) + bitmap = atoll(buff); + printf("input spp bitmap: 0x%x(%d)\n", bitmap, bitmap); + + /* to cheat kvmi function.*/ + gpa = gfn << 12; + + ret = kvmi_set_page_write_bitmap(dom, &gpa, &bitmap, 1); + + if(ret < 0) + printf("failed to set spp bitmap.\n"); + else + printf("set spp bit map successfully.\n"); + + origin_bitmap = bitmap; + bitmap = 0; + + ret = kvmi_get_page_write_bitmap(dom, gpa, &bitmap); + + if(ret <0) + printf("failed to get spp bitmap. error = %d\n", ret); + else + printf("bitmap for gfn(0x%llx) is 0x%x\n", gfn, bitmap); + + if (bitmap == origin_bitmap) + printf("spp test passed!\n"); + else + printf("spp test failed.\n"); + +} + +int main( int argc, char **argv ) +{ + void *ctx; + + if ( argc != 2 ) { + printf( "Usage:\n" + " %s PathToSocket\n" + " %s VSockPortNumber\n", + argv[0], argv[0] ); + return 1; + } + + kvmi_set_log_cb( log_cb, NULL ); + + if ( atoi( argv[1] ) > 0 ) { + ctx = kvmi_init_vsock( atoi( argv[1] ), new_guest, new_handshake, NULL ); + } else { + ctx = kvmi_init_unix_socket( argv[1], new_guest, new_handshake, NULL ); + } + + if ( !ctx ) { + perror( "kvmi_init" ); + exit( 1 ); + } + + printf( "Waiting...\n" ); + + while ( !Dom ) + sleep( 1 ); + + while ( 1 ) { + struct kvmi_dom_event *ev; + + printf( "Waiting...\n" ); + + if ( kvmi_wait_event( Dom, WAIT_30s ) ) { + if ( errno == ETIMEDOUT ) { + printf( "No event.\n" ); + + if (!spp_bypass) + spp_bitmap_test(Dom); + + continue; + } + die( "kvmi_wait_event" ); + } + + printf( "Pop event\n" ); + + if ( kvmi_pop_event( Dom, &ev ) ) + die( "kvmi_pop_event" ); + + handle_event( Dom, ev ); + + free(ev); + } + + kvmi_uninit( ctx ); + + return 0; +} diff --git a/include/Makefile.am b/include/Makefile.am new file mode 100644 index 0000000..586f49f --- /dev/null +++ b/include/Makefile.am @@ -0,0 +1,7 @@ +libkvmidir = $(includedir)/@PACKAGE_NAME@ +nobase_libkvmi_HEADERS = libkvmi.h + +kerneldir = $(libkvmidir) +nobase_kernel_HEADERS = linux/kvmi.h linux/$(ARCH)/asm/kvmi.h + +noinst_HEADERS = kvm_compat.h diff --git a/include/kvm_compat.h b/include/kvm_compat.h new file mode 100644 index 0000000..d649e26 --- /dev/null +++ b/include/kvm_compat.h @@ -0,0 +1,24 @@ +#ifndef __LIBKVMI_KVM_COMPAT_H__ +#define __LIBKVMI_KVM_COMPAT_H__ + +/* + * kvm_para.h + */ + +#ifndef KVM_EAGAIN +# define KVM_EAGAIN 11 +#endif + +#ifndef KVM_EBUSY +# define KVM_EBUSY EBUSY +#endif + +#ifndef KVM_ENOENT +# define KVM_ENOENT ENOENT +#endif + +#ifndef KVM_ENOMEM +# define KVM_ENOMEM ENOMEM +#endif + +#endif diff --git a/include/libkvmi.h b/include/libkvmi.h new file mode 100644 index 0000000..ffb73f0 --- /dev/null +++ b/include/libkvmi.h @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2017-2020 Bitdefender S.R.L. + * + * The KVMI Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * The KVMI Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the GNU C Library; if not, see + * + */ +#ifndef __LIBKVMI_H_INCLUDED__ +#define __LIBKVMI_H_INCLUDED__ + +#include +#include +#include + +#include + +typedef int kvmi_timeout_t; + +enum { KVMI_NOWAIT = 0, KVMI_WAIT = 150 }; + +struct kvmi_dom_event { + void *next; + struct { + struct kvmi_event common; + union { + struct kvmi_event_cr cr; + struct kvmi_event_msr msr; + struct kvmi_event_breakpoint breakpoint; + struct kvmi_event_pf page_fault; + struct kvmi_event_trap trap; + struct kvmi_event_descriptor desc; + }; + } event; + unsigned char buf[KVMI_MSG_SIZE]; + unsigned int seq; +}; + +struct kvmi_qemu2introspector { + uint32_t struct_size; + unsigned char uuid[16]; + uint32_t padding; + int64_t start_time; + char name[64]; + /* ... */ +}; + +struct kvmi_introspector2qemu { + uint32_t struct_size; + uint8_t cookie_hash[20]; + /* ... */ +}; + +typedef enum { KVMI_LOG_LEVEL_DEBUG, KVMI_LOG_LEVEL_INFO, KVMI_LOG_LEVEL_WARNING, KVMI_LOG_LEVEL_ERROR } kvmi_log_level; + +typedef void ( *kvmi_log_cb )( kvmi_log_level level, const char *s, void *ctx ); + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef int ( *kvmi_new_guest_cb )( void *dom, unsigned char ( *uuid )[16], void *ctx ); +typedef int ( *kvmi_handshake_cb )( const struct kvmi_qemu2introspector *, struct kvmi_introspector2qemu *, void *ctx ); + +void *kvmi_init_vsock( unsigned int port, kvmi_new_guest_cb accept_cb, kvmi_handshake_cb hsk_cb, void *cb_ctx ); +void *kvmi_init_unix_socket( const char *socket, kvmi_new_guest_cb accept_cb, kvmi_handshake_cb hsk_cb, void *cb_ctx ); +void kvmi_uninit( void *ctx ); +void kvmi_close( void *ctx ); +void kvmi_domain_close( void *dom, bool do_shutdown ); +bool kvmi_domain_is_connected( const void *dom ); +int kvmi_memory_mapping( void *dom, bool enable ); +void kvmi_domain_name( const void *dom, char *dest, size_t dest_size ); +int kvmi_connection_fd( const void *dom ); +int kvmi_get_version( void *dom, unsigned int *version ); +int kvmi_check_command( void *dom, int id ); +int kvmi_check_event( void *dom, int id ); +int kvmi_control_events( void *dom, unsigned short vcpu, int id, bool enable ); +int kvmi_control_vm_events( void *dom, int id, bool enable ); +int kvmi_control_cr( void *dom, unsigned short vcpu, unsigned int cr, bool enable ); +int kvmi_control_msr( void *dom, unsigned short vcpu, unsigned int msr, bool enable ); +int kvmi_control_singlestep( void *dom, unsigned short vcpu, bool enable ); +int kvmi_translate_gva( void *dom, unsigned short vcpu, __u64 gva, __u64 *gpa ); +int kvmi_pause_all_vcpus( void *dom, unsigned int count ); +int kvmi_get_page_access( void *dom, unsigned long long int gpa, unsigned char *access, unsigned short view ); +int kvmi_get_page_write_bitmap( void *dom, __u64 gpa, __u32 *bitmap ); +int kvmi_set_page_access( void *dom, unsigned long long int *gpa, unsigned char *access, unsigned short count, + unsigned short view ); +int kvmi_set_page_write_bitmap( void *dom, __u64 *gpa, __u32 *bitmap, unsigned short count ); +int kvmi_get_vcpu_count( void *dom, unsigned int *count ); +int64_t kvmi_get_starttime( const void *dom ); +int kvmi_get_tsc_speed( void *dom, unsigned long long int *speed ); +int kvmi_get_cpuid( void *dom, unsigned short vcpu, unsigned int function, unsigned int index, unsigned int *eax, + unsigned int *ebx, unsigned int *ecx, unsigned int *edx ); +int kvmi_get_mtrr_type( void *dom, unsigned long long int gpa, unsigned char *type ); +int kvmi_get_xcr( void *dom, unsigned short vcpu, __u8 xcr, __u64 *value ); +int kvmi_get_xsave( void *dom, unsigned short vcpu, void *buffer, size_t bufSize ); +int kvmi_set_xsave( void *dom, unsigned short vcpu, const void *buffer, size_t size ); +int kvmi_inject_exception( void *dom, unsigned short vcpu, unsigned long long int gva, unsigned int error, + unsigned char vector ); +int kvmi_read_physical( void *dom, unsigned long long int gpa, void *buffer, size_t size ); +int kvmi_write_physical( void *dom, unsigned long long int gpa, const void *buffer, size_t size ); +void * kvmi_map_physical_page( void *dom, unsigned long long int gpa ); +int kvmi_unmap_physical_page( void *dom, void *addr ); +int kvmi_get_registers( void *dom, unsigned short vcpu, struct kvm_regs *regs, struct kvm_sregs *sregs, + struct kvm_msrs *msrs, unsigned int *mode ); +int kvmi_set_registers( void *dom, unsigned short vcpu, const struct kvm_regs *regs ); +int kvmi_shutdown_guest( void *dom ); +int kvmi_reply_event( void *dom, unsigned int msg_seq, const void *data, size_t data_size ); +int kvmi_pop_event( void *dom, struct kvmi_dom_event **event ); +int kvmi_wait_event( void *dom, kvmi_timeout_t ms ); +void kvmi_set_log_cb( kvmi_log_cb cb, void *ctx ); +void * kvmi_batch_alloc( void *dom ); +int kvmi_batch_commit( void *batch ); +void kvmi_batch_free( void *batch ); +int kvmi_queue_registers( void *batch, unsigned short vcpu, const struct kvm_regs *regs ); +int kvmi_queue_reply_event( void *batch, unsigned int msg_seq, const void *data, size_t data_size ); +int kvmi_queue_page_access( void *batch, unsigned long long int *gpa, unsigned char *access, unsigned short count, + unsigned short view ); +int kvmi_queue_pause_vcpu( void *batch, unsigned short vcpu ); +int kvmi_get_maximum_gfn( void *dom, unsigned long long *gfn ); +int kvmi_spp_support( void *dom, bool *supported ); +int kvmi_ve_support( void *dom, bool *supported ); +int kvmi_vmfunc_support( void *dom, bool *supported ); +int kvmi_eptp_support( void *dom, bool *supported ); +int kvmi_queue_spp_access( void *batch, __u64 *gpa, __u32 *bitmap, __u16 view, __u16 count ); +int kvmi_set_ve_info_page( void *dom, unsigned short vcpu, unsigned long long int gpa ); +int kvmi_set_ept_page_conv( void *dom, unsigned short index, unsigned long long gpa, bool sve ); +int kvmi_get_ept_page_conv( void *dom, unsigned short index, unsigned long long gpa, bool *sve ); +int kvmi_switch_ept_view( void *dom, unsigned short vcpu, unsigned short view ); +int kvmi_disable_ve( void *dom, unsigned short vcpu ); +int kvmi_get_ept_view( void *dom, unsigned short vcpu, unsigned short *view ); +int kvmi_control_ept_view( void *dom, unsigned short vcpu, unsigned short view, bool visible ); +bool kvmi_remote_mapping_v2( void ); +size_t kvmi_get_pending_events( void *dom ); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __LIBKVMI_H_INCLUDED__ */ diff --git a/include/linux/kvmi.h b/include/linux/kvmi.h new file mode 100644 index 0000000..1ad84d4 --- /dev/null +++ b/include/linux/kvmi.h @@ -0,0 +1,362 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_KVMI_H +#define _UAPI__LINUX_KVMI_H + +/* + * KVMI structures and definitions + */ + +#include +#include +#include + +#define KVMI_VERSION 0x00000001 + +enum { + KVMI_EVENT_REPLY = 0, + KVMI_EVENT = 1, + + KVMI_GET_VERSION = 2, + KVMI_CHECK_COMMAND = 3, + KVMI_CHECK_EVENT = 4, + KVMI_GET_GUEST_INFO = 5, + KVMI_GET_VCPU_INFO = 6, + KVMI_PAUSE_VCPU = 7, + KVMI_CONTROL_VM_EVENTS = 8, + KVMI_CONTROL_EVENTS = 9, + KVMI_CONTROL_CR = 10, + KVMI_CONTROL_MSR = 11, + KVMI_CONTROL_VE = 12, + KVMI_GET_REGISTERS = 13, + KVMI_SET_REGISTERS = 14, + KVMI_GET_CPUID = 15, + KVMI_GET_XSAVE = 16, + KVMI_READ_PHYSICAL = 17, + KVMI_WRITE_PHYSICAL = 18, + KVMI_INJECT_EXCEPTION = 19, + KVMI_GET_PAGE_ACCESS = 20, + KVMI_SET_PAGE_ACCESS = 21, + KVMI_GET_MAP_TOKEN = 22, + KVMI_GET_MTRR_TYPE = 23, + KVMI_CONTROL_SPP = 24, + KVMI_GET_PAGE_WRITE_BITMAP = 25, + KVMI_SET_PAGE_WRITE_BITMAP = 26, + KVMI_CONTROL_CMD_RESPONSE = 27, + KVMI_SET_VE_INFO_PAGE = 28, + KVMI_GET_MAX_GFN = 29, + KVMI_SET_EPT_PAGE_CONV = 30, + KVMI_GET_EPT_PAGE_CONV = 31, + KVMI_SWITCH_EPT_VIEW = 32, + KVMI_DISABLE_VE = 33, + KVMI_GET_EPT_VIEW = 34, + KVMI_VCPU_TRANSLATE_GVA = 35, + KVMI_CONTROL_EPT_VIEW = 36, + KVMI_VCPU_GET_XCR = 37, + KVMI_VCPU_SET_XSAVE = 38, + + KVMI_VCPU_CONTROL_SINGLESTEP = 63, + + KVM_NUM_MESSAGES +}; + +enum { + KVMI_EVENT_UNHOOK = 0, + KVMI_EVENT_CR = 1, + KVMI_EVENT_MSR = 2, + KVMI_EVENT_XSETBV = 3, + KVMI_EVENT_BREAKPOINT = 4, + KVMI_EVENT_HYPERCALL = 5, + KVMI_EVENT_PF = 6, + KVMI_EVENT_TRAP = 7, + KVMI_EVENT_DESCRIPTOR = 8, + KVMI_EVENT_CREATE_VCPU = 9, + KVMI_EVENT_PAUSE_VCPU = 10, + KVMI_EVENT_SINGLESTEP = 11, + + KVMI_NUM_EVENTS +}; + +#define KVMI_EVENT_ACTION_CONTINUE 0 +#define KVMI_EVENT_ACTION_RETRY 1 +#define KVMI_EVENT_ACTION_CRASH 2 + +#define KVMI_PAGE_ACCESS_R (1 << 0) +#define KVMI_PAGE_ACCESS_W (1 << 1) +#define KVMI_PAGE_ACCESS_X (1 << 2) +#define KVMI_PAGE_ACCESS_SVE (1 << 3) + +#define KVMI_MSG_SIZE (4096 * 2 - sizeof(struct kvmi_msg_hdr)) + +struct kvmi_msg_hdr { + __u16 id; + __u16 size; + __u32 seq; +}; + +struct kvmi_error_code { + __s32 err; + __u32 padding; +}; + +struct kvmi_get_version_reply { + __u32 version; + __u32 padding; + struct kvmi_features features; +}; + +struct kvmi_control_cmd_response { + __u8 enable; + __u8 now; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_check_command { + __u16 id; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_check_event { + __u16 id; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_get_guest_info_reply { + __u32 vcpu_count; + __u32 padding[3]; +}; + +struct kvmi_get_page_access { + __u16 view; + __u16 count; + __u32 padding; + __u64 gpa[0]; +}; + +struct kvmi_get_page_access_reply { + __u8 access[0]; +}; + +struct kvmi_page_access_entry { + __u64 gpa; + __u8 access; + __u8 padding1; + __u16 padding2; + __u32 padding3; +}; + +struct kvmi_set_page_access { + __u16 view; + __u16 count; + __u32 padding; + struct kvmi_page_access_entry entries[0]; +}; + +struct kvmi_control_spp { + __u8 enable; + __u8 padding1; + __u16 padding2; + __u32 padding3; +}; + +struct kvmi_get_page_write_bitmap { + __u16 view; + __u16 count; + __u32 padding; + __u64 gpa[0]; +}; + +struct kvmi_get_page_write_bitmap_reply { + __u32 bitmap[0]; +}; + +struct kvmi_page_write_bitmap_entry { + __u64 gpa; + __u32 bitmap; + __u32 padding; +}; + +struct kvmi_set_page_write_bitmap { + __u16 view; + __u16 count; + __u32 padding; + struct kvmi_page_write_bitmap_entry entries[0]; +}; + +struct kvmi_get_vcpu_info_reply { + __u64 tsc_speed; +}; + +struct kvmi_pause_vcpu { + __u8 wait; + __u8 padding1; + __u16 padding2; + __u32 padding3; +}; + +struct kvmi_control_events { + __u16 event_id; + __u8 enable; + __u8 padding1; + __u32 padding2; +}; + +struct kvmi_control_vm_events { + __u16 event_id; + __u8 enable; + __u8 padding1; + __u32 padding2; +}; + +struct kvmi_read_physical { + __u64 gpa; + __u64 size; +}; + +struct kvmi_write_physical { + __u64 gpa; + __u64 size; + __u8 data[0]; +}; + +struct kvmi_vcpu_hdr { + __u16 vcpu; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_inject_exception { + __u8 nr; + __u8 padding1; + __u16 padding2; + __u32 error_code; + __u64 address; +}; + +struct kvmi_event { + __u16 size; + __u16 vcpu; + __u8 event; + __u8 padding[3]; + struct kvmi_event_arch arch; +}; + +struct kvmi_event_reply { + __u8 action; + __u8 event; + __u16 padding1; + __u32 padding2; +}; + +struct kvmi_event_pf { + __u64 gva; + __u64 gpa; + __u8 access; + __u8 padding1; + __u16 view; + __u32 padding2; +}; + +struct kvmi_event_pf_reply { + __u64 ctx_addr; + __u32 ctx_size; + __u8 singlestep; + __u8 rep_complete; + __u16 padding; + __u8 ctx_data[256]; +}; + +struct kvmi_event_breakpoint { + __u64 gpa; + __u8 insn_len; + __u8 padding[7]; +}; + +struct kvmi_mem_token { + __u64 token[4]; +}; + +struct kvmi_get_max_gfn_reply { + __u64 gfn; +}; + +struct kvmi_set_ve_info_page { + __u64 gpa; + __u8 trigger_vmexit; + __u8 padding[7]; +}; + +struct kvmi_set_ept_page_conv_req { + __u16 view; + __u8 sve; + __u8 padding[5]; + __u64 gpa; +}; + +struct kvmi_get_ept_page_conv_req { + __u16 view; + __u16 padding[3]; + __u64 gpa; +}; + +struct kvmi_get_ept_page_conv_reply { + __u8 sve; + __u8 padding[7]; +}; + +struct kvmi_switch_ept_view_req { + __u16 view; + __u16 padding[3]; +}; + +struct kvmi_get_ept_view_reply { + __u16 view; + __u8 padding[6]; +}; + +struct kvmi_control_ept_view_req { + __u16 view; + __u8 visible; + __u8 padding1; + __u32 padding2; +}; + +struct kvmi_vcpu_get_xcr { + __u8 xcr; + __u8 padding[7]; +}; + +struct kvmi_vcpu_get_xcr_reply { + __u64 value; +}; + +struct kvmi_vcpu_control_singlestep { + __u8 enable; + __u8 padding[7]; +}; + +struct kvmi_vcpu_translate_gva { + __u64 gva; +}; + +struct kvmi_vcpu_translate_gva_reply { + __u64 gpa; +}; + +/* + * ioctls for /dev/kvmmem + */ +struct kvmi_guest_mem_map { + struct kvmi_mem_token token; /* In */ + __u64 gpa; /* In/Out */ + __u64 virt; /* Out */ + __u64 length; /* Out */ +}; + +#define KVM_GUEST_MEM_OPEN _IOW('i', 0x01, unsigned char *) +#define KVM_GUEST_MEM_MAP _IOWR('i', 0x02, struct kvmi_guest_mem_map) +#define KVM_GUEST_MEM_UNMAP _IOW('i', 0x03, unsigned long) + +#endif /* _UAPI__LINUX_KVMI_H */ diff --git a/include/linux/x86_64/asm/kvmi.h b/include/linux/x86_64/asm/kvmi.h new file mode 100644 index 0000000..eeb4525 --- /dev/null +++ b/include/linux/x86_64/asm/kvmi.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_X86_KVMI_H +#define _UAPI_ASM_X86_KVMI_H + +/* + * KVM introspection - x86 specific structures and definitions + */ + +#include + +struct kvmi_event_arch { + __u8 mode; /* 2, 4 or 8 */ + __u8 padding1; + __u16 view; + __u8 padding2[4]; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct { + __u64 sysenter_cs; + __u64 sysenter_esp; + __u64 sysenter_eip; + __u64 efer; + __u64 star; + __u64 lstar; + __u64 cstar; + __u64 pat; + __u64 shadow_gs; + } msrs; +}; + +struct kvmi_event_trap { + __u8 vector; + __u8 padding1; + __u16 padding2; + __u32 error_code; + __u64 cr2; +}; + +struct kvmi_get_registers { + __u16 nmsrs; + __u16 padding1; + __u32 padding2; + __u32 msrs_idx[0]; +}; + +struct kvmi_get_registers_reply { + __u32 mode; + __u32 padding; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_msrs msrs; +}; + +struct kvmi_get_cpuid { + __u32 function; + __u32 index; +}; + +struct kvmi_get_cpuid_reply { + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; +}; + +struct kvmi_control_cr { + __u8 enable; + __u8 padding1; + __u16 padding2; + __u32 cr; +}; + +struct kvmi_event_cr { + __u16 cr; + __u16 padding[3]; + __u64 old_value; + __u64 new_value; +}; + +struct kvmi_event_cr_reply { + __u64 new_val; +}; + +struct kvmi_control_msr { + __u8 enable; + __u8 padding1; + __u16 padding2; + __u32 msr; +}; + +struct kvmi_event_msr { + __u32 msr; + __u32 padding; + __u64 old_value; + __u64 new_value; +}; + +struct kvmi_event_msr_reply { + __u64 new_val; +}; + +struct kvmi_get_xsave_reply { + __u32 region[0]; +}; + +struct kvmi_vcpu_set_xsave { + __u32 region[0]; +}; + +struct kvmi_get_mtrr_type { + __u64 gpa; +}; + +struct kvmi_get_mtrr_type_reply { + __u8 type; + __u8 padding[7]; +}; + +#define KVMI_DESC_IDTR 1 +#define KVMI_DESC_GDTR 2 +#define KVMI_DESC_LDTR 3 +#define KVMI_DESC_TR 4 + +struct kvmi_event_descriptor { + __u8 descriptor; + __u8 write; + __u8 padding[6]; +}; + +struct kvmi_features { + __u8 spp; + __u8 vmfunc; + __u8 eptp; + __u8 ve; + __u8 padding[4]; +}; + +#endif /* _UAPI_ASM_X86_KVMI_H */ diff --git a/libkvmi.pc.in b/libkvmi.pc.in new file mode 100644 index 0000000..d876016 --- /dev/null +++ b/libkvmi.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@/@PACKAGE_NAME@ + +Name: @PACKAGE_NAME@ +Description: KVM Virtual Machine Introspection library +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lkvmi +Cflags: -I${includedir} -I${includedir}/linux/@ARCH@ diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..4431c70 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,7 @@ +AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/include/linux/$(ARCH) + +lib_LTLIBRARIES = libkvmi.la + +libkvmi_la_SOURCES = kvmi.c +libkvmi_la_LDFLAGS = -pthread -version-number 0:1 \ + -Wl,--version-script,$(srcdir)/version diff --git a/src/kvmi.c b/src/kvmi.c new file mode 100644 index 0000000..af022b9 --- /dev/null +++ b/src/kvmi.c @@ -0,0 +1,2421 @@ +/* + * Copyright (C) 2017-2020 Bitdefender S.R.L. + * + * The KVMI Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * The KVMI Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the GNU C Library; if not, see + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libkvmi.h" +#include "kvm_compat.h" + +#define MIN( X, Y ) ( ( X ) < ( Y ) ? ( X ) : ( Y ) ) + +/* remote mapping v1 */ +struct kvmi_mem_map { + struct kvmi_mem_token token; + __u64 gpa; + __u64 gva; +}; +#define KVM_INTRO_MEM_MAP _IOW( 'i', 0x01, struct kvmi_mem_map ) +#define KVM_INTRO_MEM_UNMAP _IOW( 'i', 0x02, unsigned long ) + +/* VSOCK types and consts */ +/* #include "kernel/uapi/linux/vm_sockets.h" */ +typedef unsigned short __kernel_sa_family_t; +struct sockaddr_vm { + __kernel_sa_family_t svm_family; + unsigned short svm_reserved1; + unsigned int svm_port; + unsigned int svm_cid; + unsigned char svm_zero[sizeof( struct sockaddr ) - sizeof( sa_family_t ) - sizeof( unsigned short ) - + sizeof( unsigned int ) - sizeof( unsigned int )]; +}; +#ifndef AF_VSOCK +#define AF_VSOCK 40 /* vSockets */ +#define PF_VSOCK AF_VSOCK +#endif + +#ifndef VMADDR_CID_ANY +#define VMADDR_CID_ANY -1U +#endif + +#define MAX_QUEUED_EVENTS 16384 +#define MAX_BATCH_IOVS ( 50 ) +#define MAX_BATCH_BYTES ( 1024 * 1024 - 2 * sizeof( struct kvmi_control_cmd_response_msg ) ) +#define BATCH_PREALLOCATED_PAGES 4 +#define MIN_KVMI_VERSION 1 +#define MIN_HANDSHAKE_DATA offsetof( struct kvmi_qemu2introspector, name ) +#define MAX_HANDSHAKE_DATA ( 64 * 1024 ) +#define MAX_MAP_RETRIES 30 +#define MAP_RETRY_WARNING 3 +#define MAP_RETRY_SLEEP_SECS 1 + +#define KVMI_MAX_TIMEOUT 15000 + +struct kvmi_dom { + int fd; + unsigned int api_version; + struct kvmi_features supported; + bool disconnected; + int mem_fd; + void * cb_ctx; + struct kvmi_dom_event * events; + struct kvmi_dom_event * event_last; + unsigned int event_count; + pthread_mutex_t event_lock; + pthread_mutex_t lock; + struct kvmi_qemu2introspector hsk; + + char buff[5 * KVMI_MSG_SIZE]; + unsigned head; + unsigned tail; +}; + +struct kvmi_ctx { + kvmi_new_guest_cb accept_cb; + kvmi_handshake_cb handshake_cb; + void * cb_ctx; + pthread_t th_id; + bool th_started; + int th_fds[2]; + int fd; + struct sockaddr_un un_addr; + struct sockaddr_vm v_addr; +}; + +struct kvmi_control_cmd_response_msg { + struct kvmi_msg_hdr hdr; + struct kvmi_control_cmd_response cmd; +}; + +struct kvmi_batch { + struct kvmi_dom * dom; + struct iovec * vec; + size_t vec_allocated; + size_t vec_pos; + struct iovec static_vec; + size_t static_space; + size_t filled; + unsigned int first_seq; + bool wait_for_reply; + struct kvmi_control_cmd_response_msg prefix; + struct kvmi_control_cmd_response_msg suffix; +}; + +struct kvmi_set_registers_msg { + struct kvmi_msg_hdr hdr; + struct kvmi_vcpu_hdr vcpu; + struct kvm_regs regs; +}; + +struct kvmi_set_page_access_msg { + struct kvmi_msg_hdr hdr; + struct kvmi_set_page_access cmd; +}; + +struct kvmi_set_page_write_bitmap_msg { + struct kvmi_msg_hdr hdr; + struct kvmi_set_page_write_bitmap cmd; +}; + +struct kvmi_pause_vcpu_msg { + struct kvmi_msg_hdr hdr; + struct kvmi_vcpu_hdr vcpu; + struct kvmi_pause_vcpu cmd; +}; + +static long pagesize; +static size_t batch_preallocated_size; +static kvmi_log_cb log_cb; +static void * log_ctx; + +static int recv_reply( struct kvmi_dom *dom, const struct kvmi_msg_hdr *req, void *dest, size_t *dest_size ); +static int __kvmi_get_version( void *dom, unsigned int *version, struct kvmi_features *features ); +static int __kvmi_batch_commit( struct kvmi_batch *grp, bool wait_for_reply ); + +__attribute__( ( constructor ) ) static void lib_init( void ) +{ + pagesize = sysconf( _SC_PAGE_SIZE ); + batch_preallocated_size = pagesize * BATCH_PREALLOCATED_PAGES; +} + +static void kvmi_log_generic( kvmi_log_level level, const char *s, va_list va ) +{ + char *buf = NULL; + + if ( !log_cb ) + return; + + if ( vasprintf( &buf, s, va ) < 0 ) + return; + + log_cb( level, buf, log_ctx ); + + free( buf ); +} + +static void kvmi_log_error( const char *s, ... ) +{ + va_list va; + + va_start( va, s ); + kvmi_log_generic( KVMI_LOG_LEVEL_ERROR, s, va ); + va_end( va ); +} + +static void kvmi_log_warning( const char *s, ... ) +{ + va_list va; + + va_start( va, s ); + kvmi_log_generic( KVMI_LOG_LEVEL_WARNING, s, va ); + va_end( va ); +} + +static bool setup_socket( struct kvmi_ctx *ctx, const struct sockaddr *sa, size_t sa_size, int pf ) +{ + ctx->fd = socket( pf, SOCK_STREAM, 0 ); + + if ( ctx->fd == -1 ) + return false; + + if ( bind( ctx->fd, sa, sa_size ) == -1 ) + return false; + + if ( listen( ctx->fd, 0 ) == -1 ) + return false; + + /* mark the file descriptor as close-on-execute */ + if ( fcntl( ctx->fd, F_SETFD, FD_CLOEXEC ) < 0 ) + return false; + + return true; +} + +static bool setup_unix_socket( struct kvmi_ctx *ctx, const char *path ) +{ + struct stat st; + bool done; + struct sockaddr *sa; + + if ( !path || path[0] == 0 ) + return false; + + if ( stat( path, &st ) == 0 && unlink( path ) ) /* Address already in use */ + return false; + + ctx->un_addr.sun_family = AF_UNIX; + snprintf( ctx->un_addr.sun_path, sizeof( ctx->un_addr.sun_path ), "%s", path ); + + sa = ( struct sockaddr * )&ctx->un_addr; + + done = setup_socket( ctx, sa, sizeof( ctx->un_addr ), PF_UNIX ); + + if ( done ) + done = !chmod( ctx->un_addr.sun_path, 0777 ); + + return done; +} + +static bool setup_vsock( struct kvmi_ctx *ctx, unsigned int port ) +{ + struct sockaddr *sa; + + if ( !port ) + return false; + + ctx->v_addr.svm_family = AF_VSOCK; + ctx->v_addr.svm_cid = VMADDR_CID_ANY; + ctx->v_addr.svm_port = port; + + sa = ( struct sockaddr * )&ctx->v_addr; + + return setup_socket( ctx, sa, sizeof( ctx->v_addr ), PF_VSOCK ); +} + +bool kvmi_domain_is_connected( const void *d ) +{ + const struct kvmi_dom *dom = d; + + return !dom->disconnected; +} + +static int kvmi_open_kvmmem( struct kvmi_dom *dom ) +{ + if ( dom->mem_fd != -1 ) + return 0; + + dom->mem_fd = open( "/dev/kvmmem", O_RDWR ); + + return dom->mem_fd < 0 ? -1 : 0; +} + +static void kvmi_close_kvmmem( struct kvmi_dom *dom ) +{ + if ( dom->mem_fd != -1 ) { + close( dom->mem_fd ); + dom->mem_fd = -1; + } +} + +int kvmi_memory_mapping( void *d, bool enable ) +{ + struct kvmi_dom *dom = d; + + if ( !enable ) { + kvmi_close_kvmmem( dom ); + return 0; + } + + return kvmi_open_kvmmem( dom ); +} + +static void check_if_disconnected( struct kvmi_dom *dom, int err, kvmi_timeout_t ms, bool can_timeout ) +{ + if ( dom->disconnected || !err ) + return; + + if ( errno == ETIMEDOUT && ( can_timeout || ms == KVMI_NOWAIT ) ) + return; + + dom->disconnected = true; +} + +static int do_wait( struct kvmi_dom *dom, bool write, kvmi_timeout_t ms, bool can_timeout ) +{ + short event = write ? POLLOUT : POLLIN; + int err; + struct pollfd pfd[1] = {}; + + pfd[0].fd = dom->fd; + pfd[0].events = event; + + do { + err = poll( pfd, 1, ms ); + } while ( err < 0 && errno == EINTR ); + + if ( !err ) { + /* + * The man page does not specify if poll() sets errno to + * ETIMEDOUT before returning 0 + */ + errno = ETIMEDOUT; + goto out_err; + } + + if ( err < 0 ) + goto out_err; + + if ( pfd[0].revents & POLLHUP ) { + errno = EPIPE; + goto out_err; + } + + return 0; + +out_err: + check_if_disconnected( dom, errno, ms, can_timeout ); + return -1; +} + +static ssize_t buff_read( struct kvmi_dom *dom, kvmi_timeout_t ms ) +{ + ssize_t ret; + +wait: + if ( do_wait( dom, false, ms, false ) < 0 ) + return -1; + + do { + ret = recv( dom->fd, dom->buff + dom->tail, sizeof( dom->buff ) - dom->tail, 0 ); + } while ( ret < 0 && errno == EINTR ); + + if ( !ret ) { + errno = ENOTCONN; + dom->disconnected = true; + return -1; + } + + if ( ret < 0 ) { + if ( errno == EAGAIN || errno == EWOULDBLOCK ) + /* go wait for the socket to become available again */ + goto wait; + check_if_disconnected( dom, errno, ms, false ); + return -1; + } + + return ret; +} + +static int __do_read( struct kvmi_dom *dom, void *buf, size_t size, kvmi_timeout_t ms ) +{ + char *dest = buf; + + errno = 0; + + while ( size ) { + size_t cached = dom->tail - dom->head; + ssize_t n; + + if ( cached ) { + size_t bytes = MIN( size, cached ); + + memcpy( dest, dom->buff + dom->head, bytes ); + + if ( bytes == cached ) + dom->head = dom->tail = 0; + else + dom->head += bytes; + + dest += bytes; + size -= bytes; + + if ( !size ) + break; + } + + n = buff_read( dom, ms ); + + if ( n < 0 ) + return -1; + + dom->tail += n; + } + + return 0; +} + +static int do_read( struct kvmi_dom *dom, void *buf, size_t size ) +{ + return __do_read( dom, buf, size, KVMI_MAX_TIMEOUT ); +} + +static ssize_t do_write_iov( struct kvmi_dom *dom, struct iovec *iov, size_t iov_len ) +{ + struct msghdr msg = { .msg_iov = iov, .msg_iovlen = iov_len }; + + errno = 0; + + for ( ;; ) { + ssize_t n; + + if ( do_wait( dom, true, KVMI_MAX_TIMEOUT, false ) < 0 ) + return -1; + + do { + n = sendmsg( dom->fd, &msg, MSG_NOSIGNAL ); + } while ( n < 0 && errno == EINTR ); + + if ( n >= 0 ) + return n; + + if ( errno != EAGAIN && errno != EWOULDBLOCK ) { + check_if_disconnected( dom, errno, KVMI_MAX_TIMEOUT, false ); + return -1; + } + } + + return 0; +} + +static int do_write( struct kvmi_dom *dom, struct iovec *iov, size_t iov_len, size_t to_send ) +{ + size_t iov_idx = 0, prev = 0; + static bool once = true; + + while ( to_send ) { + ssize_t n; + + if ( !prev ) { + n = do_write_iov( dom, iov + iov_idx, iov_len - iov_idx ); + } else { + struct iovec tmp; + + tmp.iov_base = iov[iov_idx].iov_base + prev; + tmp.iov_len = iov[iov_idx].iov_len - prev; + + n = do_write_iov( dom, &tmp, 1 ); + } + + if ( n <= 0 || ( size_t )n > to_send ) + return -1; + + if ( ( size_t )n == to_send ) + return 0; + + if ( once ) { + kvmi_log_warning( "%s: sendmsg() was unable to send all data, resending the leftover!", + __func__ ); + once = false; + } + + to_send -= n; + + while ( n ) { + if ( prev + n >= iov[iov_idx].iov_len ) { + n -= iov[iov_idx].iov_len - prev; + iov_idx++; + + prev = 0; + } else { + prev += n; + break; + } + } + } + + return 0; +} + +static int consume_bytes( struct kvmi_dom *dom, size_t size ) +{ + while ( size ) { + unsigned char buf[1024]; + size_t chunk = ( size < sizeof( buf ) ) ? size : sizeof( buf ); + + if ( do_read( dom, buf, chunk ) ) + return -1; + + size -= chunk; + } + + return 0; +} + +static bool unsupported_version( struct kvmi_dom *dom ) +{ + unsigned int version; + struct kvmi_features supported; + + if ( __kvmi_get_version( dom, &version, &supported ) ) { + kvmi_log_error( "failed to retrieve the protocol version (invalid authentication token?)" ); + return true; + } + + if ( version < MIN_KVMI_VERSION ) { + kvmi_log_error( "invalid protocol version (received 0x%08x, expected at least 0x%08x)", version, + MIN_KVMI_VERSION ); + return true; + } + + dom->api_version = version; + dom->supported = supported; + + return false; +} + +static int read_qemu_data( struct kvmi_dom *dom, struct kvmi_qemu2introspector *qemu ) +{ + size_t incoming, useful; + void * ptr; + + memset( qemu, 0, sizeof( *qemu ) ); + + if ( do_read( dom, &qemu->struct_size, sizeof( qemu->struct_size ) ) ) + return -1; + + incoming = qemu->struct_size; + + if ( incoming < MIN_HANDSHAKE_DATA ) { + errno = ENODATA; + return -1; + } + + if ( incoming > MAX_HANDSHAKE_DATA ) { + errno = E2BIG; + return -1; + } + + qemu->struct_size = MIN( incoming, sizeof( *qemu ) ); + ptr = ( char * )qemu + sizeof( qemu->struct_size ); + useful = qemu->struct_size - sizeof( qemu->struct_size ); + + if ( do_read( dom, ptr, useful ) ) + return -1; + + qemu->name[sizeof( qemu->name ) - 1] = 0; + + incoming -= sizeof( qemu->struct_size ); + incoming -= useful; + + return consume_bytes( dom, incoming ); +} + +static bool handshake_done( struct kvmi_ctx *ctx, struct kvmi_dom *dom ) +{ + struct kvmi_qemu2introspector *qemu = &dom->hsk; + struct kvmi_introspector2qemu intro = {}; + struct iovec iov = { .iov_base = &intro, .iov_len = sizeof( intro ) }; + + if ( read_qemu_data( dom, qemu ) ) { + kvmi_log_error( "Invalid handshake data" ); + return false; + } + + intro.struct_size = sizeof( intro ); + if ( ctx->handshake_cb && ctx->handshake_cb( qemu, &intro, ctx->cb_ctx ) < 0 ) + return false; + + return do_write( dom, &iov, 1, iov.iov_len ) == 0; +} + +/* The same sequence variable is used by all domains. */ +static unsigned int new_seq( void ) +{ + static unsigned int seq; + + return __sync_add_and_fetch( &seq, 1 ); +} + +static void kvmi_batch_init( struct kvmi_batch *grp, struct kvmi_dom *dom ) +{ + grp->dom = dom; + grp->static_vec.iov_base = grp + 1; + grp->static_space = batch_preallocated_size - sizeof( *grp ); + grp->first_seq = new_seq(); + grp->wait_for_reply = true; +} + +void *kvmi_batch_alloc( void *dom ) +{ + struct kvmi_batch *grp; + + grp = calloc( 1, batch_preallocated_size ); + if ( grp ) + kvmi_batch_init( grp, dom ); + + return grp; +} + +static void kvmi_batch_free_iov( struct kvmi_batch *grp ) +{ + struct iovec *iov = grp->vec; + + if ( iov ) { + for ( ; grp->vec_allocated--; iov++ ) + if ( iov->iov_base != grp->static_vec.iov_base ) + free( iov->iov_base ); + + free( grp->vec ); + } +} + +void kvmi_batch_free( void *_grp ) +{ + struct kvmi_batch *grp = _grp; + + if ( !grp ) + return; + + kvmi_batch_free_iov( grp ); + + free( grp ); +} + +static void kvmi_batch_reset( struct kvmi_batch *grp ) +{ + struct kvmi_dom *dom = grp->dom; + + kvmi_batch_free_iov( grp ); + + memset( grp, 0, batch_preallocated_size ); + kvmi_batch_init( grp, dom ); +} + +static int kvmi_enlarge_batch_iovec( struct kvmi_batch *grp ) +{ + size_t old_size = grp->vec_allocated; + size_t new_size = ( old_size + 1 ) * 2; + struct iovec *new_ptr; + + new_ptr = realloc( grp->vec, new_size * sizeof( *grp->vec ) ); + if ( !new_ptr ) + return -1; + + grp->vec = new_ptr; + memset( grp->vec + old_size, 0, ( new_size - old_size ) * sizeof( *grp->vec ) ); + grp->vec_allocated = new_size; + + return 0; +} + +static bool message_added_to_static_buffer( struct kvmi_batch *grp, const void *src, size_t src_size ) +{ + size_t dest_space; + char * dest; + + if ( grp->vec ) + return false; + + dest = ( char * )grp->static_vec.iov_base + grp->static_vec.iov_len; + dest_space = grp->static_space - grp->static_vec.iov_len; + + if ( src_size > dest_space ) + return false; + + memcpy( dest, src, src_size ); + + grp->static_vec.iov_len += src_size; + + return true; +} + +static int __kvmi_batch_add( struct kvmi_batch *grp, const void *data, size_t data_size ) +{ + struct iovec *iov; + + if ( message_added_to_static_buffer( grp, data, data_size ) ) + goto out; + + if ( grp->vec_pos == grp->vec_allocated ) { + if ( kvmi_enlarge_batch_iovec( grp ) ) + return -1; + + if ( grp->vec_pos == 0 ) { + grp->vec[0].iov_base = grp->static_vec.iov_base; + grp->vec[0].iov_len = grp->static_vec.iov_len; + grp->vec_pos = 1; + } + } + + iov = grp->vec + grp->vec_pos; + + iov->iov_base = malloc( data_size ); + if ( !iov->iov_base ) + return -1; + + memcpy( iov->iov_base, data, data_size ); + iov->iov_len = data_size; + + grp->vec_pos++; + +out: + grp->filled += data_size; + return 0; +} + +static int kvmi_batch_check_space( struct kvmi_batch *grp, size_t data_size, size_t count ) +{ + if ( data_size > MAX_BATCH_BYTES || grp->filled + data_size > MAX_BATCH_BYTES ) + return -1; + + if ( grp->vec_pos + count >= MAX_BATCH_IOVS ) + return -1; + + return 0; +} + +static int kvmi_batch_add( struct kvmi_batch *grp, const void *data, size_t data_size ) +{ + if ( !data_size ) + return 0; + + if ( kvmi_batch_check_space( grp, data_size, 1 ) ) { + if ( __kvmi_batch_commit( grp, false ) ) + return -1; + kvmi_batch_reset( grp ); + } + + return __kvmi_batch_add( grp, data, data_size ); +} + +static void setup_kvmi_control_cmd_response_msg( struct kvmi_control_cmd_response_msg *msg, bool enable, bool now, + unsigned int seq ) +{ + memset( msg, 0, sizeof( *msg ) ); + + msg->hdr.id = KVMI_CONTROL_CMD_RESPONSE; + msg->hdr.seq = seq; + msg->hdr.size = sizeof( *msg ) - sizeof( msg->hdr ); + + msg->cmd.enable = enable; + msg->cmd.now = now ? 1 : 0; +} + +static void disable_command_reply( struct kvmi_control_cmd_response_msg *msg, unsigned int seq ) +{ + setup_kvmi_control_cmd_response_msg( msg, false, true, seq ); +} + +static void enable_command_reply( struct kvmi_control_cmd_response_msg *msg, bool now ) +{ + setup_kvmi_control_cmd_response_msg( msg, true, now, new_seq() ); +} + +static bool batch_with_event_reply_only( struct iovec *iov ) +{ + struct kvmi_msg_hdr *hdr = iov->iov_base; + bool one_msg_in_iovec = ( iov->iov_len == sizeof( *hdr ) + hdr->size ); + + return ( one_msg_in_iovec && hdr->id == KVMI_EVENT_REPLY ); +} + +static struct iovec *alloc_iovec( struct kvmi_batch *grp, struct iovec *buf, size_t buf_len, size_t *iov_cnt, + size_t *total_len, bool wait_for_reply ) +{ + struct iovec *iov, *new_iov; + size_t n, new_n; + + if ( grp->vec_pos ) { + n = grp->vec_pos; + iov = grp->vec; + } else if ( grp->static_vec.iov_len ) { + n = 1; + iov = &grp->static_vec; + } else { + n = 0; + iov = buf; + } + + if ( n == 0 || ( n == 1 && batch_with_event_reply_only( iov ) ) ) { + *iov_cnt = n; + *total_len = grp->filled; + return iov; + } + + new_n = n + 2; + + if ( new_n <= buf_len ) + new_iov = buf; + else { + new_iov = calloc( new_n, sizeof( *new_iov ) ); + if ( !new_iov ) + return NULL; + } + + disable_command_reply( &grp->prefix, grp->first_seq ); + new_iov[0].iov_base = &grp->prefix; + new_iov[0].iov_len = sizeof( grp->prefix ); + + memcpy( new_iov + 1, iov, n * sizeof( *iov ) ); + + enable_command_reply( &grp->suffix, wait_for_reply ); + new_iov[n + 1].iov_base = &grp->suffix; + new_iov[n + 1].iov_len = sizeof( grp->suffix ); + + *iov_cnt = new_n; + *total_len = grp->filled + sizeof( grp->prefix ) + sizeof( grp->suffix ); + return new_iov; +} + +static void free_iovec( struct iovec *iov, struct kvmi_batch *grp, struct iovec *buf ) +{ + if ( iov != buf && iov != grp->vec && iov != &grp->static_vec ) + free( iov ); +} + +static int __kvmi_batch_commit( struct kvmi_batch *grp, bool wait_for_reply ) +{ + struct kvmi_dom *dom; + struct iovec buf_iov[30]; + struct iovec * iov = NULL; + size_t n = 0; + size_t total_len = 0; + int err = 0; + + iov = alloc_iovec( grp, buf_iov, sizeof( buf_iov ) / sizeof( buf_iov[0] ), &n, &total_len, wait_for_reply ); + if ( !iov ) + return -1; + if ( !n ) + goto out; + + dom = grp->dom; + + pthread_mutex_lock( &dom->lock ); + + err = do_write( dom, iov, n, total_len ); + if ( !err && wait_for_reply ) + err = recv_reply( dom, &grp->suffix.hdr, NULL, NULL ); + + pthread_mutex_unlock( &dom->lock ); + +out: + free_iovec( iov, grp, buf_iov ); + + return err; +} + +int kvmi_batch_commit( void *_grp ) +{ + struct kvmi_batch *grp = _grp; + + return __kvmi_batch_commit( grp, grp->wait_for_reply ); +} + +static int set_nonblock( int fd ) +{ + int flags = fcntl( fd, F_GETFL ); + + if ( flags == -1 ) + return -1; + + if ( fcntl( fd, F_SETFL, flags | O_NONBLOCK ) == -1 ) + return -1; + + return 0; +} + +static void *accept_worker( void *_ctx ) +{ + struct kvmi_ctx *ctx = _ctx; + + for ( ;; ) { + struct kvmi_dom *dom; + int ret; + int fd; + struct pollfd fds[2]; + + memset( fds, 0, sizeof( fds ) ); + + fds[0].fd = ctx->fd; + fds[0].events = POLLIN; + + fds[1].fd = ctx->th_fds[0]; + fds[1].events = POLLIN; + + do { + ret = poll( fds, sizeof( fds ) / sizeof( fds[0] ), -1 ); + } while ( ret < 0 && errno == EINTR ); + + if ( ret < 0 ) + break; + + if ( fds[1].revents ) + break; + + if ( !fds[0].revents ) + break; + + do { + fd = accept( ctx->fd, NULL, NULL ); + } while ( fd < 0 && errno == EINTR ); + + if ( fd == -1 ) + break; + + if ( set_nonblock( fd ) ) { + shutdown( fd, SHUT_RDWR ); + close( fd ); + break; + } + + dom = calloc( 1, sizeof( *dom ) ); + if ( !dom ) + break; + + dom->fd = fd; + dom->mem_fd = -1; + pthread_mutex_init( &dom->event_lock, NULL ); + pthread_mutex_init( &dom->lock, NULL ); + + if ( !handshake_done( ctx, dom ) ) { + kvmi_log_error( "the handshake has failed" ); + kvmi_domain_close( dom, true ); + continue; + } + + if ( unsupported_version( dom ) ) { + kvmi_domain_close( dom, true ); + continue; + } + + dom->cb_ctx = ctx->cb_ctx; + + if ( ctx->accept_cb( dom, &dom->hsk.uuid, ctx->cb_ctx ) != 0 ) { + kvmi_domain_close( dom, true ); + continue; + } + } + + return NULL; +} + +static struct kvmi_ctx *alloc_kvmi_ctx( kvmi_new_guest_cb accept_cb, kvmi_handshake_cb hsk_cb, void *cb_ctx ) +{ + struct kvmi_ctx *ctx; + + if ( !accept_cb ) + return NULL; + + ctx = calloc( 1, sizeof( *ctx ) ); + if ( !ctx ) + return NULL; + + ctx->fd = -1; + + ctx->accept_cb = accept_cb; + ctx->handshake_cb = hsk_cb; + ctx->cb_ctx = cb_ctx; + + ctx->th_fds[0] = -1; + ctx->th_fds[1] = -1; + + /* these will be used to signal the accept worker to exit */ + if ( pipe( ctx->th_fds ) < 0 ) { + free( ctx ); + return NULL; + } + + return ctx; +} + +static bool start_listener( struct kvmi_ctx *ctx ) +{ + if ( pthread_create( &ctx->th_id, NULL, accept_worker, ctx ) ) + return false; + + ctx->th_started = true; + return true; +} + +void *kvmi_init_unix_socket( const char *socket, kvmi_new_guest_cb accept_cb, kvmi_handshake_cb hsk_cb, void *cb_ctx ) +{ + struct kvmi_ctx *ctx; + int err; + + errno = 0; + + ctx = alloc_kvmi_ctx( accept_cb, hsk_cb, cb_ctx ); + if ( !ctx ) + return NULL; + + if ( !setup_unix_socket( ctx, socket ) ) + goto out_err; + + if ( !start_listener( ctx ) ) + goto out_err; + + return ctx; +out_err: + err = errno; + kvmi_uninit( ctx ); + errno = err; + return NULL; +} + +void *kvmi_init_vsock( unsigned int port, kvmi_new_guest_cb accept_cb, kvmi_handshake_cb hsk_cb, void *cb_ctx ) +{ + struct kvmi_ctx *ctx; + int err; + + errno = 0; + + ctx = alloc_kvmi_ctx( accept_cb, hsk_cb, cb_ctx ); + if ( !ctx ) + return NULL; + + if ( !setup_vsock( ctx, port ) ) + goto out_err; + + if ( !start_listener( ctx ) ) + goto out_err; + + return ctx; +out_err: + err = errno; + kvmi_uninit( ctx ); + errno = err; + return NULL; +} + +void kvmi_uninit( void *_ctx ) +{ + struct kvmi_ctx *ctx = _ctx; + + if ( !ctx ) + return; + + if ( ctx->fd != -1 ) { + shutdown( ctx->fd, SHUT_RDWR ); + close( ctx->fd ); + } + + if ( ctx->th_fds[1] != -1 && ctx->th_started ) { + /* we have a running thread */ + if ( write( ctx->th_fds[1], "\n", 1 ) == 1 ) + pthread_join( ctx->th_id, NULL ); + } + + /* close pipe between threads */ + if ( ctx->th_fds[0] != -1 ) + close( ctx->th_fds[0] ); + if ( ctx->th_fds[1] != -1 ) + close( ctx->th_fds[1] ); + + free( ctx ); +} + +/* + * This function is called by the child of a process that did kvm_init(). + * All this does is close the file descriptor so that there's no longer + * a reference to it. The threads cannot be uninitialized because after + * fork they are in an undefined state (it's unspecified if they can be + * joined). + */ +void kvmi_close( void *_ctx ) +{ + struct kvmi_ctx *ctx = _ctx; + + if ( !ctx ) + return; + + if ( ctx->fd != -1 ) { + close( ctx->fd ); + ctx->fd = -1; + } +} + +void kvmi_domain_close( void *d, bool do_shutdown ) +{ + struct kvmi_dom *dom = d; + + if ( !dom ) + return; + + kvmi_close_kvmmem( dom ); + + if ( do_shutdown ) + shutdown( dom->fd, SHUT_RDWR ); + close( dom->fd ); + + for ( struct kvmi_dom_event *ev = dom->events; ev; ) { + struct kvmi_dom_event *next = ev->next; + + free( ev ); + ev = next; + } + + pthread_mutex_destroy( &dom->event_lock ); + pthread_mutex_destroy( &dom->lock ); + + free( dom ); +} + +int kvmi_connection_fd( const void *d ) +{ + const struct kvmi_dom *dom = d; + + return dom->fd; +} + +void kvmi_domain_name( const void *d, char *buffer, size_t buffer_size ) +{ + const struct kvmi_dom *dom = d; + + snprintf( buffer, buffer_size, "%s", dom->hsk.name ); +} + +int64_t kvmi_get_starttime( const void *d ) +{ + const struct kvmi_dom *dom = d; + + return dom->hsk.start_time; +} + +static int kvmi_send_msg( struct kvmi_dom *dom, unsigned short msg_id, unsigned msg_seq, const void *data, + size_t data_size ) +{ + struct kvmi_msg_hdr hdr = { .id = msg_id, .seq = msg_seq, .size = data_size }; + struct iovec iov[] = { + { .iov_base = &hdr, .iov_len = sizeof( hdr ) }, + { .iov_base = ( void * )data, .iov_len = data_size }, + }; + size_t n = data_size ? 2 : 1; + + return do_write( dom, iov, n, sizeof( hdr ) + data_size ); +} + +static bool is_event( unsigned msg_id ) +{ + return ( msg_id == KVMI_EVENT ); +} + +static int copy_event_common_data( struct kvmi_dom_event *ev, size_t *incoming ) +{ + const struct kvmi_event *in_common = ( const struct kvmi_event * )ev->buf; + struct kvmi_event * out_common = &ev->event.common; + size_t min_msg_size = offsetof( struct kvmi_event, arch ); + size_t useful = MIN( in_common->size, sizeof( *out_common ) ); + + if ( in_common->size > *incoming || in_common->size < min_msg_size ) + return -1; + + if ( useful ) + memcpy( out_common, in_common, useful ); + + *incoming -= in_common->size; + + return 0; +} + +static int expected_event_data_size( size_t event_id, size_t *size ) +{ + static const size_t unknown = 0; + static const size_t sz[] = { + [KVMI_EVENT_BREAKPOINT] = sizeof( struct kvmi_event_breakpoint ), + [KVMI_EVENT_CREATE_VCPU] = 1, + [KVMI_EVENT_CR] = sizeof( struct kvmi_event_cr ), + [KVMI_EVENT_DESCRIPTOR] = sizeof( struct kvmi_event_descriptor ), + [KVMI_EVENT_HYPERCALL] = 1, + [KVMI_EVENT_MSR] = sizeof( struct kvmi_event_msr ), + [KVMI_EVENT_PAUSE_VCPU] = 1, + [KVMI_EVENT_PF] = sizeof( struct kvmi_event_pf ), + [KVMI_EVENT_TRAP] = sizeof( struct kvmi_event_trap ), + [KVMI_EVENT_UNHOOK] = 1, + [KVMI_EVENT_XSETBV] = 1, + [KVMI_EVENT_SINGLESTEP] = 1, + }; + + if ( event_id >= sizeof( sz ) / sizeof( sz[0] ) || sz[event_id] == unknown ) + return -1; + + *size = sz[event_id] & ~1; + return 0; +} + +static int copy_event_specific_data( struct kvmi_dom_event *ev, size_t incoming ) +{ + const struct kvmi_event * in_common = ( const struct kvmi_event * )ev->buf; + const struct kvmi_event_cr *in_cr = ( const struct kvmi_event_cr * )( ev->buf + in_common->size ); + struct kvmi_event_cr * out_cr = &ev->event.cr; + size_t expected; + size_t useful; + + if ( expected_event_data_size( ev->event.common.event, &expected ) ) + return -1; + + useful = MIN( expected, incoming ); + if ( useful ) + memcpy( out_cr, in_cr, useful ); + + return 0; +} + +/* + * newer/extended event: + * received: [ common ] [ specific ] + * internal: [ common ] [ specific ] + * older/smaller event: + * received: [ common ] [ specific ] + * internal: [ common ] [ specific ] + */ +static int kvmi_read_event_data( struct kvmi_dom *dom, struct kvmi_dom_event *ev, size_t msg_size, kvmi_timeout_t ms ) +{ + size_t max_msg_size = sizeof( ev->buf ); + + if ( msg_size > max_msg_size ) + goto out_inval; + + if ( __do_read( dom, &ev->buf, msg_size, ms ) ) + return -1; + + if ( copy_event_common_data( ev, &msg_size ) ) + goto out_inval; + + if ( copy_event_specific_data( ev, msg_size ) ) + goto out_inval; + + return 0; + +out_inval: + errno = EINVAL; + return -1; +} + +static int kvmi_push_event( struct kvmi_dom *dom, unsigned int seq, unsigned int size, kvmi_timeout_t ms ) +{ + bool queued = true; + struct kvmi_dom_event *new_event; + + new_event = calloc( 1, sizeof( *new_event ) ); + if ( !new_event ) + return -1; + + if ( kvmi_read_event_data( dom, new_event, size, ms ) ) { + int _errno = errno; + + free( new_event ); + errno = _errno; + return -1; + } + + new_event->seq = seq; + new_event->next = NULL; + + pthread_mutex_lock( &dom->event_lock ); + /* Don't queue events ad infinitum */ + if ( dom->event_count < MAX_QUEUED_EVENTS ) { + if ( dom->event_last ) + dom->event_last->next = new_event; + else + dom->events = new_event; + dom->event_last = new_event; + dom->event_count++; + } else + queued = false; + pthread_mutex_unlock( &dom->event_lock ); + + if ( !queued ) { + free( new_event ); + errno = ENOMEM; + return -1; + } + + return 0; +} + +/* The caller is responsible for free()-ing the event */ +int kvmi_pop_event( void *d, struct kvmi_dom_event **event ) +{ + struct kvmi_dom *dom = d; + + pthread_mutex_lock( &dom->event_lock ); + *event = dom->events; + if ( *event ) { + dom->events = ( *event )->next; + + if ( --dom->event_count == 0 ) + dom->event_last = NULL; + + ( *event )->next = NULL; + } + pthread_mutex_unlock( &dom->event_lock ); + + if ( *event == NULL ) { + errno = EAGAIN; + return -1; + } + + return 0; +} + +size_t kvmi_get_pending_events( void *d ) +{ + struct kvmi_dom *dom = d; + size_t cnt; + + pthread_mutex_lock( &dom->event_lock ); + cnt = dom->event_count; + pthread_mutex_unlock( &dom->event_lock ); + + return cnt; +} + +static int recv_reply_header( struct kvmi_dom *dom, const struct kvmi_msg_hdr *req, size_t *size ) +{ + struct kvmi_msg_hdr h; + + while ( !do_read( dom, &h, sizeof( h ) ) ) { + if ( is_event( h.id ) ) { + if ( kvmi_push_event( dom, h.seq, h.size, KVMI_WAIT ) ) + break; + } else if ( h.id != req->id || h.seq != req->seq ) { + errno = ENOMSG; + kvmi_log_error( "Wrong message %u instead of %u (seq %u/%u)", h.id, req->id, h.seq, req->seq ); + break; + } else { + *size = h.size; + return 0; + } + } + + return -1; +} + +static int convert_kvm_error_to_errno( int err ) +{ + switch ( err ) { + case -KVM_ENOSYS: + return ENOSYS; + case -KVM_EFAULT: + return EFAULT; + case -KVM_E2BIG: + return E2BIG; + case -KVM_EPERM: + return EPERM; + case -KVM_EOPNOTSUPP: + return EOPNOTSUPP; + case -KVM_EAGAIN: + return EAGAIN; + case -KVM_EBUSY: + return EBUSY; + case -KVM_EINVAL: + return EINVAL; + case -KVM_ENOENT: + return ENOENT; + case -KVM_ENOMEM: + return ENOMEM; + default: + return EPROTO; + } +} + +static int recv_error_code( struct kvmi_dom *dom, size_t *msg_size ) +{ + struct kvmi_error_code ec; + + if ( *msg_size < sizeof( ec ) ) { + errno = ENODATA; + return -1; + } + + if ( do_read( dom, &ec, sizeof( ec ) ) ) + return -1; + + if ( ec.err ) { + errno = convert_kvm_error_to_errno( ec.err ); + return -1; + } + + *msg_size -= sizeof( ec ); + return 0; +} + +static int recv_reply_data( struct kvmi_dom *dom, size_t incoming, void *dest, size_t *dest_size ) +{ + size_t expected = dest_size ? *dest_size : 0; + size_t useful = MIN( incoming, expected ); + + if ( useful && do_read( dom, dest, useful ) ) + return -1; + + if ( incoming > useful ) + return consume_bytes( dom, incoming - useful ); + + if ( expected > useful ) { + size_t missing = expected - useful; + + memset( ( char * )dest + useful, 0, missing ); + + *dest_size = useful; + } + + return 0; +} + +static int recv_reply( struct kvmi_dom *dom, const struct kvmi_msg_hdr *req, void *dest, size_t *dest_size ) +{ + size_t incoming; + + if ( recv_reply_header( dom, req, &incoming ) ) + return -1; + + if ( recv_error_code( dom, &incoming ) ) + return -1; + + return recv_reply_data( dom, incoming, dest, dest_size ); +} + +static int request_raw( struct kvmi_dom *dom, const void *src, size_t src_size, void *dest, size_t *dest_size ) +{ + struct iovec iov = { .iov_base = ( void * )src, .iov_len = src_size }; + const struct kvmi_msg_hdr *req = src; + int err; + + pthread_mutex_lock( &dom->lock ); + + err = do_write( dom, &iov, 1, src_size ); + if ( !err ) + err = recv_reply( dom, req, dest, dest_size ); + + pthread_mutex_unlock( &dom->lock ); + + return err; +} + +static int request_iov( struct kvmi_dom *dom, struct iovec *iov, size_t n, size_t size, void *dest, size_t *dest_size ) +{ + const struct kvmi_msg_hdr *req = iov[0].iov_base; + int err; + + pthread_mutex_lock( &dom->lock ); + + err = do_write( dom, iov, n, size ); + if ( !err ) + err = recv_reply( dom, req, dest, dest_size ); + + pthread_mutex_unlock( &dom->lock ); + + return err; +} + +static int request( struct kvmi_dom *dom, unsigned short msg_id, const void *src, size_t src_size, void *dest, + size_t *dest_size ) +{ + int err; + struct kvmi_msg_hdr req = { .id = msg_id, .seq = new_seq() }; + + pthread_mutex_lock( &dom->lock ); + + err = kvmi_send_msg( dom, msg_id, req.seq, src, src_size ); + + if ( !err ) + err = recv_reply( dom, &req, dest, dest_size ); + + pthread_mutex_unlock( &dom->lock ); + + return err; +} + +int kvmi_control_events( void *dom, unsigned short vcpu, int id, bool enable ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_control_events cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .event_id = id, .enable = enable } }; + + return request( dom, KVMI_CONTROL_EVENTS, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_control_cr( void *dom, unsigned short vcpu, unsigned int cr, bool enable ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_control_cr cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .cr = cr, .enable = enable } }; + + return request( dom, KVMI_CONTROL_CR, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_control_msr( void *dom, unsigned short vcpu, unsigned int msr, bool enable ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_control_msr cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .msr = msr, .enable = enable } }; + + return request( dom, KVMI_CONTROL_MSR, &req, sizeof( req ), NULL, NULL ); +} + +static void setup_kvmi_pause_vcpu_msg( struct kvmi_pause_vcpu_msg *msg, unsigned short vcpu ) +{ + memset( msg, 0, sizeof( *msg ) ); + + msg->hdr.id = KVMI_PAUSE_VCPU; + msg->hdr.seq = new_seq(); + msg->hdr.size = sizeof( *msg ) - sizeof( msg->hdr ); + + msg->vcpu.vcpu = vcpu; +} + +int kvmi_queue_pause_vcpu( void *grp, unsigned short vcpu ) +{ + struct kvmi_pause_vcpu_msg msg; + + setup_kvmi_pause_vcpu_msg( &msg, vcpu ); + + return kvmi_batch_add( grp, &msg, sizeof( msg ) ); +} + +int kvmi_pause_all_vcpus( void *dom, unsigned int count ) +{ + struct kvmi_pause_vcpu_msg msg; + unsigned short vcpu; + int err = -1; + void * grp; + + if ( !count ) + return 0; + + grp = kvmi_batch_alloc( dom ); + if ( !grp ) + return -1; + + for ( vcpu = 0; vcpu < count; vcpu++ ) { + + setup_kvmi_pause_vcpu_msg( &msg, vcpu ); + + msg.cmd.wait = 1; + + if ( kvmi_batch_add( grp, &msg, sizeof( msg ) ) ) + goto out; + } + + if ( kvmi_batch_commit( grp ) ) + goto out; + + err = 0; +out: + kvmi_batch_free( grp ); + + return err; +} + +int kvmi_get_page_access( void *dom, unsigned long long int gpa, unsigned char *access, unsigned short view ) +{ + struct kvmi_get_page_access * req = NULL; + struct kvmi_get_page_access_reply *rpl = NULL; + size_t req_size = sizeof( *req ) + 1 * sizeof( req->gpa[0] ); + size_t rpl_size = sizeof( *rpl ) + 1 * sizeof( rpl->access[0] ); + int err = -1; + + req = calloc( 1, req_size ); + rpl = malloc( rpl_size ); + if ( !req || !rpl ) + goto out; + + req->count = 1; + req->gpa[0] = gpa; + req->view = view; + + err = request( dom, KVMI_GET_PAGE_ACCESS, req, req_size, rpl, &rpl_size ); + + if ( !err ) + *access = rpl->access[0]; + +out: + free( req ); + free( rpl ); + + return err; +} + +int kvmi_get_page_write_bitmap( void *dom, __u64 gpa, __u32 *bitmap ) +{ + struct kvmi_get_page_write_bitmap * req = NULL; + struct kvmi_get_page_write_bitmap_reply *rpl = NULL; + size_t req_size = sizeof( *req ) + 1 * sizeof( req->gpa[0] ); + size_t rpl_size = sizeof( *rpl ) + 1 * sizeof( rpl->bitmap[0] ); + int err = -1; + + req = malloc( req_size ); + rpl = malloc( rpl_size ); + if ( !req || !rpl ) + goto out; + + memset( req, 0, req_size ); + req->count = 1; + req->gpa[0] = gpa; + + err = request( dom, KVMI_GET_PAGE_WRITE_BITMAP, req, req_size, rpl, &rpl_size ); + + if ( !err ) + *bitmap = rpl->bitmap[0]; + +out: + free( req ); + free( rpl ); + + return err; +} + +static void *alloc_kvmi_set_page_access_msg( unsigned long long int *gpa, unsigned char *access, unsigned short count, + size_t *msg_size, unsigned short view ) +{ + struct kvmi_set_page_access_msg *msg; + unsigned int k; + + *msg_size = sizeof( *msg ) + count * sizeof( msg->cmd.entries[0] ); + msg = calloc( 1, *msg_size ); + if ( !msg ) + return NULL; + + msg->hdr.id = KVMI_SET_PAGE_ACCESS; + msg->hdr.seq = new_seq(); + msg->hdr.size = *msg_size - sizeof( msg->hdr ); + + msg->cmd.count = count; + msg->cmd.view = view; + + for ( k = 0; k < count; k++ ) { + msg->cmd.entries[k].gpa = gpa[k]; + msg->cmd.entries[k].access = access[k]; + } + + return msg; +} + +int kvmi_set_page_access( void *dom, unsigned long long int *gpa, unsigned char *access, unsigned short count, + unsigned short view ) +{ + void * msg; + size_t msg_size; + int err = -1; + + msg = alloc_kvmi_set_page_access_msg( gpa, access, count, &msg_size, view ); + if ( msg ) { + err = request_raw( dom, msg, msg_size, NULL, NULL ); + free( msg ); + } + + return err; +} + +int kvmi_queue_page_access( void *grp, unsigned long long int *gpa, unsigned char *access, unsigned short count, + unsigned short view ) +{ + struct kvmi_set_page_access_msg *msg; + size_t msg_size; + int err = -1; + + msg = alloc_kvmi_set_page_access_msg( gpa, access, count, &msg_size, view ); + if ( !msg ) + return -1; + + err = kvmi_batch_add( grp, msg, msg_size ); + + free( msg ); + + return err; +} + +static void *alloc_kvmi_set_page_write_bitmap_msg( __u64 *gpa, __u32 *bitmap, __u16 view, __u16 count, + size_t *msg_size ) +{ + struct kvmi_set_page_write_bitmap_msg *msg; + unsigned int k; + + *msg_size = sizeof( *msg ) + count * sizeof( msg->cmd.entries[0] ); + msg = calloc( 1, *msg_size ); + if ( !msg ) + return NULL; + + msg->hdr.id = KVMI_SET_PAGE_WRITE_BITMAP; + msg->hdr.seq = new_seq(); + msg->hdr.size = *msg_size - sizeof( msg->hdr ); + + msg->cmd.view = view; + msg->cmd.count = count; + + for ( k = 0; k < count; k++ ) { + msg->cmd.entries[k].gpa = gpa[k]; + msg->cmd.entries[k].bitmap = bitmap[k]; + } + + return msg; +} + +int kvmi_set_page_write_bitmap( void *dom, __u64 *gpa, __u32 *bitmap, unsigned short count ) +{ + void * msg; + size_t msg_size; + int err = -1; + __u16 view = 0; + + msg = alloc_kvmi_set_page_write_bitmap_msg( gpa, bitmap, view, count, &msg_size ); + if ( msg ) { + err = request_raw( dom, msg, msg_size, NULL, NULL ); + free( msg ); + } + + return err; +} + +int kvmi_queue_spp_access( void *grp, __u64 *gpa, __u32 *bitmap, __u16 view, __u16 count ) +{ + struct kvmi_set_page_write_bitmap *msg; + size_t msg_size; + int err; + + msg = alloc_kvmi_set_page_write_bitmap_msg( gpa, bitmap, view, count, &msg_size ); + if ( !msg ) + return -1; + + err = kvmi_batch_add( grp, msg, msg_size ); + + free( msg ); + + return err; +} + +int kvmi_get_vcpu_count( void *dom, unsigned int *count ) +{ + struct kvmi_get_guest_info_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_GUEST_INFO, NULL, 0, &rpl, &received ); + + if ( !err ) + *count = rpl.vcpu_count; + + return err; +} + +int kvmi_get_tsc_speed( void *dom, unsigned long long int *speed ) +{ + struct kvmi_vcpu_hdr req = { .vcpu = 0 }; + struct kvmi_get_vcpu_info_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_VCPU_INFO, &req, sizeof( req ), &rpl, &received ); + + if ( !err ) + *speed = rpl.tsc_speed; + + return err; +} + +int kvmi_get_cpuid( void *dom, unsigned short vcpu, unsigned int function, unsigned int index, unsigned int *eax, + unsigned int *ebx, unsigned int *ecx, unsigned int *edx ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_get_cpuid cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .function = function, .index = index } }; + struct kvmi_get_cpuid_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_CPUID, &req, sizeof( req ), &rpl, &received ); + + if ( !err ) { + *eax = rpl.eax; + *ebx = rpl.ebx; + *ecx = rpl.ecx; + *edx = rpl.edx; + } + + return err; +} + +int kvmi_get_mtrr_type( void *dom, unsigned long long int gpa, unsigned char *type ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_get_mtrr_type cmd; + } req = { .vcpu = { .vcpu = 0 }, .cmd = { .gpa = gpa } }; + struct kvmi_get_mtrr_type_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_MTRR_TYPE, &req, sizeof( req ), &rpl, &received ); + + if ( !err ) + *type = rpl.type; + + return err; +} + +static int request_varlen_response( struct kvmi_dom *dom, unsigned short msg_id, const void *src, size_t src_size, + size_t *rpl_size ) +{ + struct kvmi_msg_hdr req = { .id = msg_id, .seq = new_seq() }; + + if ( kvmi_send_msg( dom, msg_id, req.seq, src, src_size ) ) + return -1; + + if ( recv_reply_header( dom, &req, rpl_size ) ) + return -1; + + if ( recv_error_code( dom, rpl_size ) ) + return -1; + + return 0; +} + +int kvmi_get_xsave( void *dom, unsigned short vcpu, void *buffer, size_t buf_size ) +{ + struct kvmi_vcpu_hdr req = { .vcpu = vcpu }; + + return request( dom, KVMI_GET_XSAVE, &req, sizeof( req ), buffer, &buf_size ); +} + +int kvmi_inject_exception( void *dom, unsigned short vcpu, unsigned long long int gva, unsigned int error, + unsigned char vector ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_inject_exception cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .nr = vector, .error_code = error, .address = gva } }; + + return request( dom, KVMI_INJECT_EXCEPTION, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_read_physical( void *dom, unsigned long long int gpa, void *buffer, size_t size ) +{ + struct kvmi_read_physical req = { .gpa = gpa, .size = size }; + + return request( dom, KVMI_READ_PHYSICAL, &req, sizeof( req ), buffer, &size ); +} + +int kvmi_write_physical( void *dom, unsigned long long int gpa, const void *buffer, size_t size ) +{ + struct kvmi_write_physical *req; + size_t req_size = sizeof( *req ) + size; + int err = -1; + + req = malloc( req_size ); + if ( !req ) + return -1; + + req->gpa = gpa; + req->size = size; + memcpy( req->data, buffer, size ); + + err = request( dom, KVMI_WRITE_PHYSICAL, req, req_size, NULL, NULL ); + + free( req ); + + return err; +} + +void *kvmi_map_physical_page( void *d, unsigned long long int gpa ) +{ + struct kvmi_dom *dom = d; + + errno = 0; + + void *addr = mmap( NULL, pagesize, PROT_READ | PROT_WRITE, + MAP_LOCKED | MAP_POPULATE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 ); + + if ( addr != MAP_FAILED ) { + struct kvmi_mem_token token; + struct kvmi_mem_map map_req; + int retries = 0; + int err; + + do { + size_t received = sizeof( token ); + + err = request( dom, KVMI_GET_MAP_TOKEN, NULL, 0, &token, &received ); + + if ( !err ) { + /* fill IOCTL arg */ + memcpy( &map_req.token, &token, sizeof( struct kvmi_mem_token ) ); + map_req.gpa = gpa; + map_req.gva = ( __u64 )addr; + + /* do map IOCTL request */ + err = ioctl( dom->mem_fd, KVM_INTRO_MEM_MAP, &map_req ); + } + + if ( err && ( errno == EAGAIN || errno == EBUSY ) ) { + if ( retries++ == MAP_RETRY_WARNING ) + kvmi_log_warning( "Slow mapping for gpa %llx", gpa ); + if ( retries < MAX_MAP_RETRIES ) + sleep( MAP_RETRY_SLEEP_SECS ); + } else + break; + } while ( retries < MAX_MAP_RETRIES ); + + if ( err ) { + int _errno = errno; + munmap( addr, pagesize ); + errno = _errno; + addr = MAP_FAILED; + } + } + + return addr; +} + +int kvmi_unmap_physical_page( void *d, void *addr ) +{ + const struct kvmi_dom *dom = d; + int _errno; + int err; + + /* do unmap IOCTL request */ + err = ioctl( dom->mem_fd, KVM_INTRO_MEM_UNMAP, addr ); + _errno = errno; + + munmap( addr, pagesize ); + + errno = _errno; + + return err; +} + +static void *alloc_get_registers_req( unsigned short vcpu, struct kvm_msrs *msrs, size_t *req_size ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_get_registers regs; + } * req; + + *req_size = sizeof( *req ) + sizeof( __u32 ) * msrs->nmsrs; + req = calloc( 1, *req_size ); + + if ( req ) { + unsigned int k = 0; + + req->vcpu.vcpu = vcpu; + req->regs.nmsrs = msrs->nmsrs; + + for ( ; k < msrs->nmsrs; k++ ) + req->regs.msrs_idx[k] = msrs->entries[k].index; + } + + return req; +} + +static int process_get_registers_reply( struct kvmi_dom *dom, size_t received, struct kvm_regs *regs, + struct kvm_sregs *sregs, struct kvm_msrs *msrs, unsigned int *mode ) +{ + struct kvmi_get_registers_reply rpl; + + if ( received != sizeof( rpl ) + sizeof( struct kvm_msr_entry ) * msrs->nmsrs ) { + errno = E2BIG; + return -1; + } + + if ( do_read( dom, &rpl, sizeof( rpl ) ) ) + return -1; + + if ( do_read( dom, &msrs->entries, sizeof( struct kvm_msr_entry ) * msrs->nmsrs ) ) + return -1; + + memcpy( regs, &rpl.regs, sizeof( *regs ) ); + memcpy( sregs, &rpl.sregs, sizeof( *sregs ) ); + *mode = rpl.mode; + + return 0; +} + +int kvmi_get_registers( void *d, unsigned short vcpu, struct kvm_regs *regs, struct kvm_sregs *sregs, + struct kvm_msrs *msrs, unsigned int *mode ) +{ + struct kvmi_dom *dom = d; + void * req; + size_t req_size; + size_t received; + int err = -1; + + req = alloc_get_registers_req( vcpu, msrs, &req_size ); + + if ( !req ) + return -1; + + pthread_mutex_lock( &dom->lock ); + + err = request_varlen_response( dom, KVMI_GET_REGISTERS, req, req_size, &received ); + + if ( !err ) + err = process_get_registers_reply( dom, received, regs, sregs, msrs, mode ); + + pthread_mutex_unlock( &dom->lock ); + + free( req ); + + return err; +} + +static void setup_kvmi_set_registers_msg( struct kvmi_set_registers_msg *msg, unsigned short vcpu, + const struct kvm_regs *regs ) +{ + memset( msg, 0, sizeof( *msg ) ); + + msg->hdr.id = KVMI_SET_REGISTERS; + msg->hdr.seq = new_seq(); + msg->hdr.size = sizeof( *msg ) - sizeof( msg->hdr ); + + msg->vcpu.vcpu = vcpu; + msg->regs = *regs; +} + +int kvmi_queue_registers( void *grp, unsigned short vcpu, const struct kvm_regs *regs ) +{ + struct kvmi_set_registers_msg msg; + + setup_kvmi_set_registers_msg( &msg, vcpu, regs ); + + return kvmi_batch_add( grp, &msg, sizeof( msg ) ); +} + +int kvmi_set_registers( void *dom, unsigned short vcpu, const struct kvm_regs *regs ) +{ + struct kvmi_set_registers_msg msg; + + setup_kvmi_set_registers_msg( &msg, vcpu, regs ); + + return request_raw( dom, &msg, sizeof( msg ), NULL, NULL ); +} + +static void setup_reply_header( struct kvmi_msg_hdr *hdr, unsigned int seq, size_t msg_size ) +{ + memset( hdr, 0, sizeof( *hdr ) ); + + hdr->id = KVMI_EVENT_REPLY; + hdr->seq = seq; + hdr->size = msg_size; +} + +int kvmi_queue_reply_event( void *grp, unsigned int seq, const void *data, size_t data_size ) +{ + struct kvmi_msg_hdr hdr; + size_t reply_size = sizeof( hdr ) + data_size; + + if ( data_size > UINT_MAX ) { /* overflow */ + errno = E2BIG; + return -1; + } + + if ( kvmi_batch_check_space( grp, reply_size, 2 ) ) { + if ( __kvmi_batch_commit( grp, false ) ) + return -1; + kvmi_batch_reset( grp ); + } + + setup_reply_header( &hdr, seq, data_size ); + + if ( __kvmi_batch_add( grp, &hdr, sizeof( hdr ) ) ) + return -1; + + if ( __kvmi_batch_add( grp, data, data_size ) ) + return -1; + + ( ( struct kvmi_batch * )grp )->wait_for_reply = false; + return 0; +} + +int kvmi_reply_event( void *_dom, unsigned int seq, const void *data, size_t data_size ) +{ + struct kvmi_dom * dom = _dom; + struct kvmi_msg_hdr hdr; + struct iovec iov[] = { + { .iov_base = &hdr, .iov_len = sizeof( hdr ) }, + { .iov_base = ( void * )data, .iov_len = data_size }, + }; + int err; + + setup_reply_header( &hdr, seq, data_size ); + + pthread_mutex_lock( &dom->lock ); + + err = do_write( dom, iov, 2, sizeof( hdr ) + data_size ); + + pthread_mutex_unlock( &dom->lock ); + + return err; +} + +static int __kvmi_get_version( void *dom, unsigned int *version, struct kvmi_features *supported ) +{ + struct kvmi_get_version_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_VERSION, NULL, 0, &rpl, &received ); + + if ( !err ) { + *version = rpl.version; + *supported = rpl.features; + } + + return err; +} + +int kvmi_get_version( void *dom, unsigned int *version ) +{ + *version = ( ( struct kvmi_dom * )dom )->api_version; + + return 0; +} + +int kvmi_spp_support( void *dom, bool *supported ) +{ + *supported = ( ( struct kvmi_dom * )dom )->supported.spp; + + return 0; +} + +int kvmi_ve_support( void *dom, bool *supported ) +{ + *supported = ( ( struct kvmi_dom * )dom )->supported.ve; + + return 0; +} + +int kvmi_vmfunc_support( void *dom, bool *supported ) +{ + *supported = ( ( struct kvmi_dom * )dom )->supported.vmfunc; + + return 0; +} + +int kvmi_eptp_support( void *dom, bool *supported ) +{ + *supported = ( ( struct kvmi_dom * )dom )->supported.eptp; + + return 0; +} + +int kvmi_check_command( void *dom, int id ) +{ + struct kvmi_check_command req = { .id = id }; + + return request( dom, KVMI_CHECK_COMMAND, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_check_event( void *dom, int id ) +{ + struct kvmi_check_command req = { .id = id }; + + return request( dom, KVMI_CHECK_EVENT, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_control_vm_events( void *dom, int id, bool enable ) +{ + struct kvmi_control_vm_events req = { .event_id = id, .enable = enable }; + + return request( dom, KVMI_CONTROL_VM_EVENTS, &req, sizeof( req ), NULL, NULL ); +} + +static int kvmi_read_event_header( struct kvmi_dom *dom, unsigned int *id, unsigned int *size, unsigned int *seq, + kvmi_timeout_t ms ) +{ + struct kvmi_msg_hdr h; + + if ( __do_read( dom, &h, sizeof( h ), ms ) ) + return -1; + + *id = h.id; + *seq = h.seq; + *size = h.size; + + return 0; +} + +static int kvmi_read_event( struct kvmi_dom *dom, kvmi_timeout_t ms ) +{ + unsigned int msgid; + unsigned int msgsize; + unsigned int msgseq; + + if ( kvmi_read_event_header( dom, &msgid, &msgsize, &msgseq, ms ) ) + return -1; + + if ( !is_event( msgid ) ) { + errno = EINVAL; + return -1; + } + + return kvmi_push_event( dom, msgseq, msgsize, ms ); +} + +int kvmi_wait_event( void *d, kvmi_timeout_t ms ) +{ + bool empty; + int err; + struct kvmi_dom *dom = d; + + /* Don't wait for events if there is one already queued. */ + pthread_mutex_lock( &dom->event_lock ); + empty = dom->events == NULL; + pthread_mutex_unlock( &dom->event_lock ); + + if ( !empty ) + return 0; + /* + * This ugly code is needed so that we do not block other threads + * that are trying to send commands while we are waiting for events. + */ + pthread_mutex_lock( &dom->lock ); + if ( dom->tail - dom->head ) { + /* + * The buffer is not empty. As we are shielded by the lock, it + * can be nothing else than an event (complete or partially). + */ + err = kvmi_read_event( dom, KVMI_NOWAIT ); + pthread_mutex_unlock( &dom->lock ); + } else { + pthread_mutex_unlock( &dom->lock ); + /* Wait for events without blocking too much other threads. */ + err = do_wait( dom, false, ms, true ); + if ( !err ) { + pthread_mutex_lock( &dom->lock ); + /* + * It is possible that we've lost the chance to read the + * event, someone else might have queued it. So, we don't + * wait at all. We'll get it next time from the queue. + */ + err = kvmi_read_event( dom, KVMI_NOWAIT ); + pthread_mutex_unlock( &dom->lock ); + } + } + + return err; +} + +void kvmi_set_log_cb( kvmi_log_cb cb, void *ctx ) +{ + log_cb = cb; + log_ctx = ctx; +} + +int kvmi_get_maximum_gfn( void *dom, unsigned long long *gfn ) +{ + struct kvmi_get_max_gfn_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_GET_MAX_GFN, NULL, 0, &rpl, &received ); + if ( !err ) + *gfn = rpl.gfn; + + return err; +} + +/* begin of VE related functions */ +int kvmi_set_ve_info_page( void *dom, unsigned short vcpu, unsigned long long int gpa ) +{ + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_set_ve_info_page cmd; + } req = { .hdr = { .vcpu = vcpu }, .cmd = { .gpa = gpa } }; + + return request( dom, KVMI_SET_VE_INFO_PAGE, &req, sizeof( req ), NULL, 0 ); +} + +int kvmi_set_ept_page_conv( void *dom, unsigned short index, unsigned long long gpa, bool sve ) +{ + struct kvmi_set_ept_page_conv_req req = { .view = index, .gpa = gpa, .sve = sve }; + + return request( dom, KVMI_SET_EPT_PAGE_CONV, &req, sizeof( req ), NULL, 0 ); +} + +int kvmi_get_ept_page_conv( void *dom, unsigned short index, unsigned long long gpa, bool *sve ) +{ + struct kvmi_get_ept_page_conv_req req = { .view = index, .gpa = gpa }; + struct kvmi_get_ept_page_conv_reply rpl; + int err; + size_t received = sizeof( rpl ); + + err = request( dom, KVMI_GET_EPT_PAGE_CONV, &req, sizeof( req ), &rpl, &received ); + if ( !err ) + *sve = !!rpl.sve; + + return err; +} + +int kvmi_switch_ept_view( void *dom, unsigned short vcpu, unsigned short view ) +{ + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_switch_ept_view_req cmd; + } req = { .hdr = { .vcpu = vcpu }, .cmd = { .view = view } }; + + return request( dom, KVMI_SWITCH_EPT_VIEW, &req, sizeof( req ), NULL, 0 ); +} + +int kvmi_disable_ve( void *dom, unsigned short vcpu ) +{ + struct kvmi_vcpu_hdr req = { .vcpu = vcpu }; + + return request( dom, KVMI_DISABLE_VE, &req, sizeof( req ), NULL, 0 ); +} + +int kvmi_get_ept_view( void *dom, unsigned short vcpu, unsigned short *view ) +{ + struct kvmi_vcpu_hdr req = { .vcpu = vcpu }; + struct kvmi_get_ept_view_reply rpl; + int err; + size_t received = sizeof( rpl ); + + err = request( dom, KVMI_GET_EPT_VIEW, &req, sizeof( req ), &rpl, &received ); + if ( !err ) + *view = rpl.view; + + return err; +} + +int kvmi_control_ept_view( void *dom, unsigned short vcpu, unsigned short view, bool visible ) +{ + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_control_ept_view_req cmd; + } req = { .hdr = { .vcpu = vcpu }, .cmd = { .view = view, .visible = visible } }; + + return request( dom, KVMI_CONTROL_EPT_VIEW, &req, sizeof( req ), NULL, 0 ); +} +/* end of VE related functions */ + +int kvmi_control_singlestep( void *dom, unsigned short vcpu, bool enable ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_vcpu_control_singlestep cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .enable = enable } }; + + return request( dom, KVMI_VCPU_CONTROL_SINGLESTEP, &req, sizeof( req ), NULL, NULL ); +} + +int kvmi_get_xcr( void *dom, unsigned short vcpu, __u8 xcr, __u64 *value ) +{ + struct { + struct kvmi_vcpu_hdr hdr; + struct kvmi_vcpu_get_xcr cmd; + } req = { .hdr = { .vcpu = vcpu }, .cmd = { .xcr = xcr } }; + struct kvmi_vcpu_get_xcr_reply rpl; + int err; + size_t received = sizeof( rpl ); + + err = request( dom, KVMI_VCPU_GET_XCR, &req, sizeof( req ), &rpl, &received ); + if ( !err ) + *value = rpl.value; + + return err; +} + +int kvmi_set_xsave( void *dom, unsigned short vcpu, const void *buffer, size_t size ) +{ + struct kvmi_msg_hdr hdr = {}; + struct kvmi_vcpu_hdr vcpu_hdr = {}; + struct iovec iov[] = { + { .iov_base = &hdr, .iov_len = sizeof( hdr ) }, + { .iov_base = &vcpu_hdr, .iov_len = sizeof( vcpu_hdr ) }, + { .iov_base = ( void * )buffer, .iov_len = size }, + }; + size_t n = sizeof( iov ) / sizeof( iov[0] ); + size_t total_size = sizeof( hdr ) + sizeof( vcpu_hdr ) + size; + + hdr.id = KVMI_VCPU_SET_XSAVE; + hdr.seq = new_seq(); + hdr.size = total_size - sizeof( hdr ); + + vcpu_hdr.vcpu = vcpu; + + return request_iov( dom, iov, n, total_size, NULL, NULL ); +} + +int kvmi_translate_gva( void *dom, unsigned short vcpu, __u64 gva, __u64 *gpa ) +{ + struct { + struct kvmi_vcpu_hdr vcpu; + struct kvmi_vcpu_translate_gva cmd; + } req = { .vcpu = { .vcpu = vcpu }, .cmd = { .gva = gva } }; + struct kvmi_vcpu_translate_gva_reply rpl; + size_t received = sizeof( rpl ); + int err; + + err = request( dom, KVMI_VCPU_TRANSLATE_GVA, &req, sizeof( req ), &rpl, &received ); + if ( !err ) + *gpa = rpl.gpa; + + return err; +} diff --git a/src/version b/src/version new file mode 100644 index 0000000..509cce0 --- /dev/null +++ b/src/version @@ -0,0 +1,71 @@ +KVMI_1.0 { + global: + kvmi_batch_alloc; + kvmi_batch_commit; + kvmi_batch_free; + kvmi_close; + kvmi_check_command; + kvmi_check_event; + kvmi_connection_fd; + kvmi_control_cr; + kvmi_control_events; + kvmi_control_msr; + kvmi_control_singlestep; + kvmi_control_vm_events; + kvmi_domain_close; + kvmi_domain_is_connected; + kvmi_domain_name; + kvmi_eptp_support; + kvmi_get_cpuid; + kvmi_get_maximum_gfn; + kvmi_get_page_access; + kvmi_get_page_write_bitmap; + kvmi_get_pending_events; + kvmi_get_registers; + kvmi_get_starttime; + kvmi_get_tsc_speed; + kvmi_get_vcpu_count; + kvmi_get_version; + kvmi_get_xcr; + kvmi_get_xsave; + kvmi_init_unix_socket; + kvmi_init_vsock; + kvmi_inject_exception; + kvmi_map_physical_page; + kvmi_memory_mapping; + kvmi_pause_all_vcpus; + kvmi_pause_vcpu; + kvmi_pop_event; + kvmi_queue_page_access; + kvmi_queue_registers; + kvmi_queue_pause_vcpu; + kvmi_queue_reply_event; + kvmi_queue_spp_access; + kvmi_read_physical; + kvmi_reply_event; + kvmi_set_event_cb; + kvmi_set_log_cb; + kvmi_set_page_access; + kvmi_set_page_write_bitmap; + kvmi_set_registers; + kvmi_set_xsave; + kvmi_shutdown_guest; + kvmi_spp_support; + kvmi_translate_gva; + kvmi_ve_support; + kvmi_vmfunc_support; + kvmi_uninit; + kvmi_unmap_physical_page; + kvmi_wait_event; + kvmi_write_physical; + kvmi_get_mtrr_type; + kvmi_set_ve_info_page; + kvmi_set_ept_page_conv; + kvmi_get_ept_page_conv; + kvmi_switch_ept_view; + kvmi_disable_ve; + kvmi_get_ept_view; + kvmi_control_ept_view; + local: + *; +};