-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvmx.c
11565 lines (9915 loc) · 374 KB
/
vmx.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <[email protected]>
* Yaniv Kamay <[email protected]>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#include "irq.h"
#include "mmu.h"
#include "cpuid.h"
#include "lapic.h"
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
#include <linux/mod_devicetable.h>
#include <linux/trace_events.h>
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/hrtimer.h>
#include "kvm_cache_regs.h"
#include "x86.h"
#include <asm/cpu.h>
#include <asm/io.h>
#include <asm/desc.h>
#include <asm/vmx.h>
#include <asm/virtext.h>
#include <asm/mce.h>
#include <asm/fpu/internal.h>
#include <asm/perf_event.h>
#include <asm/debugreg.h>
#include <asm/kexec.h>
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include "trace.h"
#include "pmu.h"
#define __ex(x) __kvm_handle_fault_on_reboot(x)
#define __ex_clear(x, reg) \
____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
#define MAX_VM_COUNT 8
// v cpu count
static int v_cpuid[MAX_VM_COUNT] = {0};
static ktime_t exit_time;
static ktime_t enter_time;
// ext exception ====
static int vmextcntr_exception_nmi = 0;
static int vmextcntr_exception_nmis[MAX_VM_COUNT] = {0};
static int vmextcntr_external_interrupt = 0;
static int vmextcntr_external_interrupts[MAX_VM_COUNT] = {0};
static int vmextcntr_triple_fault = 0;
static int vmextcntr_triple_faults[MAX_VM_COUNT] = {0};
static int vmextcntr_nmi_window = 0;
static int vmextcntr_nmi_windows[MAX_VM_COUNT] = {0};
static int vmextcntr_io_instruction = 0;
static int vmextcntr_io_instructions[MAX_VM_COUNT] = {0};
static int vmextcntr_cr_access = 0;
static int vmextcntr_cr_accesss[MAX_VM_COUNT] = {0};
static int vmextcntr_dr_access = 0;
static int vmextcntr_dr_accesss[MAX_VM_COUNT] = {0};
static int vmextcntr_cpuid = 0;
static int vmextcntr_cpuids[MAX_VM_COUNT] = {0};
static int vmextcntr_msr_read = 0;
static int vmextcntr_msr_reads[MAX_VM_COUNT] = {0};
static int vmextcntr_msr_write = 0;
static int vmextcntr_msr_writes[MAX_VM_COUNT] = {0};
static int vmextcntr_pending_interrupt = 0;
static int vmextcntr_pending_interrupts[MAX_VM_COUNT] = {0};
static int vmextcntr_hlt = 0;
static int vmextcntr_hlts[MAX_VM_COUNT] = {0};
static int vmextcntr_invd = 0;
static int vmextcntr_invds[MAX_VM_COUNT] = {0};
static int vmextcntr_invlpg = 0;
static int vmextcntr_invlpgs[MAX_VM_COUNT] = {0};
static int vmextcntr_rdpmc = 0;
static int vmextcntr_rdpmcs[MAX_VM_COUNT] = {0};
static int vmextcntr_vmcall = 0;
static int vmextcntr_vmcalls[MAX_VM_COUNT] = {0};
static int vmextcntr_vmclear = 0;
static int vmextcntr_vmclears[MAX_VM_COUNT] = {0};
static int vmextcntr_vmlaunch = 0;
static int vmextcntr_vmlaunchs[MAX_VM_COUNT] = {0};
static int vmextcntr_vmptrld = 0;
static int vmextcntr_vmptrlds[MAX_VM_COUNT] = {0};
static int vmextcntr_vmptrst = 0;
static int vmextcntr_vmptrsts[MAX_VM_COUNT] = {0};
static int vmextcntr_vmread = 0;
static int vmextcntr_vmreads[MAX_VM_COUNT] = {0};
static int vmextcntr_vmresume = 0;
static int vmextcntr_vmresumes[MAX_VM_COUNT] = {0};
static int vmextcntr_vmwrite = 0;
static int vmextcntr_vmwrites[MAX_VM_COUNT] = {0};
static int vmextcntr_vmoff = 0;
static int vmextcntr_vmoffs[MAX_VM_COUNT] = {0};
static int vmextcntr_vmon = 0;
static int vmextcntr_vmons[MAX_VM_COUNT] = {0};
static int vmextcntr_tpr_below_threshold = 0;
static int vmextcntr_tpr_below_thresholds[MAX_VM_COUNT] = {0};
static int vmextcntr_apic_access = 0;
static int vmextcntr_apic_accesss[MAX_VM_COUNT] = {0};
static int vmextcntr_apic_write = 0;
static int vmextcntr_apic_writes[MAX_VM_COUNT] = {0};
static int vmextcntr_eoi_induced = 0;
static int vmextcntr_eoi_induceds[MAX_VM_COUNT] = {0};
static int vmextcntr_wbinvd = 0;
static int vmextcntr_wbinvds[MAX_VM_COUNT] = {0};
static int vmextcntr_xsetbv = 0;
static int vmextcntr_xsetbvs[MAX_VM_COUNT] = {0};
static int vmextcntr_task_switch = 0;
static int vmextcntr_task_switchs[MAX_VM_COUNT] = {0};
static int vmextcntr_mce_during_vmentry = 0;
static int vmextcntr_mce_during_vmentrys[MAX_VM_COUNT] = {0};
static int vmextcntr_ept_violation = 0;
static int vmextcntr_ept_violations[MAX_VM_COUNT] = {0};
static int vmextcntr_ept_misconfig = 0;
static int vmextcntr_ept_misconfigs[MAX_VM_COUNT] = {0};
static int vmextcntr_pause_instruction = 0;
static int vmextcntr_pause_instructions[MAX_VM_COUNT] = {0};
static int vmextcntr_mwait_instruction = 0;
static int vmextcntr_mwait_instructions[MAX_VM_COUNT] = {0};
static int vmextcntr_monitor_trap_flag = 0;
static int vmextcntr_monitor_trap_flags[MAX_VM_COUNT] = {0};
static int vmextcntr_monitor_instruction = 0;
static int vmextcntr_monitor_instructions[MAX_VM_COUNT] = {0};
static int vmextcntr_invept = 0;
static int vmextcntr_invepts[MAX_VM_COUNT] = {0};
static int vmextcntr_invvpid = 0;
static int vmextcntr_invvpids[MAX_VM_COUNT] = {0};
static int vmextcntr_xsaves = 0;
static int vmextcntr_xsavess[MAX_VM_COUNT] = {0};
static int vmextcntr_xrstors = 0;
static int vmextcntr_xrstorss[MAX_VM_COUNT] = {0};
static int vmextcntr_pml_full = 0;
static int vmextcntr_pml_fulls[MAX_VM_COUNT] = {0};
static int vmextcntr_pcommit = 0;
static int vmextcntr_pcommits[MAX_VM_COUNT] = {0};
// count exception for each v cpu
static int my_handle_exit(int *count_array,struct kvm_vcpu *vcpu) {
int i;
for (i = 0; i < MAX_VM_COUNT; i++) {
if(v_cpuid[i] == vcpu->vcpu_id) {
count_array[i]++;
break;
}
if( v_cpuid[i] == 0 ) {
v_cpuid[i] = vcpu->vcpu_id;
count_array[i]++;
break;
}
}
exit_time = ktime_get();
}
static const struct x86_cpu_id vmx_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_VMX),
{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
static bool __read_mostly enable_ept = 1;
module_param_named(ept, enable_ept, bool, S_IRUGO);
static bool __read_mostly enable_unrestricted_guest = 1;
module_param_named(unrestricted_guest,
enable_unrestricted_guest, bool, S_IRUGO);
static bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly vmm_exclusive = 1;
module_param(vmm_exclusive, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
static bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
/*
* If nested=1, nested virtualization is supported, i.e., guests may use
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
* use VMX instructions.
*/
static bool __read_mostly nested = 0;
module_param(nested, bool, S_IRUGO);
static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
* executions of PAUSE in a loop. Also indicate if ple enabled.
* According to test, this time is usually smaller than 128 cycles.
* ple_window: upper bound on the amount of time a guest is allowed to execute
* in a PAUSE loop. Tests indicate that most spinlocks are held for
* less than 2^12 cycles
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
#define KVM_VMX_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);
/* Default doubles per-vcpu window every exit. */
static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
module_param(ple_window_grow, int, S_IRUGO);
/* Default resets per-vcpu window every exit to ple_window. */
static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
module_param(ple_window_shrink, int, S_IRUGO);
/* Default is to compute the maximum so we can never overflow. */
static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, int, S_IRUGO);
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
#define VMCS02_POOL_SIZE 1
struct vmcs {
u32 revision_id;
u32 abort;
char data[0];
};
/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
* loaded on this CPU (so we can clear them if the CPU goes down).
*/
struct loaded_vmcs {
struct vmcs *vmcs;
int cpu;
int launched;
struct list_head loaded_vmcss_on_cpu_link;
};
struct shared_msr_entry {
unsigned index;
u64 data;
u64 mask;
};
/*
* struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
* single nested guest (L2), hence the name vmcs12. Any VMX implementation has
* a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests.
* nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
* If there are changes in this struct, VMCS12_REVISION must be changed.
*/
typedef u64 natural_width;
struct __packed vmcs12 {
/* According to the Intel spec, a VMCS region must start with the
* following two fields. Then follow implementation-specific data.
*/
u32 revision_id;
u32 abort;
u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
u32 padding[7]; /* room for future expansion */
u64 io_bitmap_a;
u64 io_bitmap_b;
u64 msr_bitmap;
u64 vm_exit_msr_store_addr;
u64 vm_exit_msr_load_addr;
u64 vm_entry_msr_load_addr;
u64 tsc_offset;
u64 virtual_apic_page_addr;
u64 apic_access_addr;
u64 posted_intr_desc_addr;
u64 ept_pointer;
u64 eoi_exit_bitmap0;
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
u64 guest_ia32_debugctl;
u64 guest_ia32_pat;
u64 guest_ia32_efer;
u64 guest_ia32_perf_global_ctrl;
u64 guest_pdptr0;
u64 guest_pdptr1;
u64 guest_pdptr2;
u64 guest_pdptr3;
u64 guest_bndcfgs;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
u64 padding64[8]; /* room for future expansion */
/*
* To allow migration of L1 (complete with its L2 guests) between
* machines of different natural widths (32 or 64 bit), we cannot have
* unsigned long fields with no explict size. We use u64 (aliased
* natural_width) instead. Luckily, x86 is little-endian.
*/
natural_width cr0_guest_host_mask;
natural_width cr4_guest_host_mask;
natural_width cr0_read_shadow;
natural_width cr4_read_shadow;
natural_width cr3_target_value0;
natural_width cr3_target_value1;
natural_width cr3_target_value2;
natural_width cr3_target_value3;
natural_width exit_qualification;
natural_width guest_linear_address;
natural_width guest_cr0;
natural_width guest_cr3;
natural_width guest_cr4;
natural_width guest_es_base;
natural_width guest_cs_base;
natural_width guest_ss_base;
natural_width guest_ds_base;
natural_width guest_fs_base;
natural_width guest_gs_base;
natural_width guest_ldtr_base;
natural_width guest_tr_base;
natural_width guest_gdtr_base;
natural_width guest_idtr_base;
natural_width guest_dr7;
natural_width guest_rsp;
natural_width guest_rip;
natural_width guest_rflags;
natural_width guest_pending_dbg_exceptions;
natural_width guest_sysenter_esp;
natural_width guest_sysenter_eip;
natural_width host_cr0;
natural_width host_cr3;
natural_width host_cr4;
natural_width host_fs_base;
natural_width host_gs_base;
natural_width host_tr_base;
natural_width host_gdtr_base;
natural_width host_idtr_base;
natural_width host_ia32_sysenter_esp;
natural_width host_ia32_sysenter_eip;
natural_width host_rsp;
natural_width host_rip;
natural_width paddingl[8]; /* room for future expansion */
u32 pin_based_vm_exec_control;
u32 cpu_based_vm_exec_control;
u32 exception_bitmap;
u32 page_fault_error_code_mask;
u32 page_fault_error_code_match;
u32 cr3_target_count;
u32 vm_exit_controls;
u32 vm_exit_msr_store_count;
u32 vm_exit_msr_load_count;
u32 vm_entry_controls;
u32 vm_entry_msr_load_count;
u32 vm_entry_intr_info_field;
u32 vm_entry_exception_error_code;
u32 vm_entry_instruction_len;
u32 tpr_threshold;
u32 secondary_vm_exec_control;
u32 vm_instruction_error;
u32 vm_exit_reason;
u32 vm_exit_intr_info;
u32 vm_exit_intr_error_code;
u32 idt_vectoring_info_field;
u32 idt_vectoring_error_code;
u32 vm_exit_instruction_len;
u32 vmx_instruction_info;
u32 guest_es_limit;
u32 guest_cs_limit;
u32 guest_ss_limit;
u32 guest_ds_limit;
u32 guest_fs_limit;
u32 guest_gs_limit;
u32 guest_ldtr_limit;
u32 guest_tr_limit;
u32 guest_gdtr_limit;
u32 guest_idtr_limit;
u32 guest_es_ar_bytes;
u32 guest_cs_ar_bytes;
u32 guest_ss_ar_bytes;
u32 guest_ds_ar_bytes;
u32 guest_fs_ar_bytes;
u32 guest_gs_ar_bytes;
u32 guest_ldtr_ar_bytes;
u32 guest_tr_ar_bytes;
u32 guest_interruptibility_info;
u32 guest_activity_state;
u32 guest_sysenter_cs;
u32 host_ia32_sysenter_cs;
u32 vmx_preemption_timer_value;
u32 padding32[7]; /* room for future expansion */
u16 virtual_processor_id;
u16 posted_intr_nv;
u16 guest_es_selector;
u16 guest_cs_selector;
u16 guest_ss_selector;
u16 guest_ds_selector;
u16 guest_fs_selector;
u16 guest_gs_selector;
u16 guest_ldtr_selector;
u16 guest_tr_selector;
u16 guest_intr_status;
u16 host_es_selector;
u16 host_cs_selector;
u16 host_ss_selector;
u16 host_ds_selector;
u16 host_fs_selector;
u16 host_gs_selector;
u16 host_tr_selector;
};
/*
* VMCS12_REVISION is an arbitrary id that should be changed if the content or
* layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
* VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
*/
#define VMCS12_REVISION 0x11e57ed0
/*
* VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
* and any VMCS region. Although only sizeof(struct vmcs12) are used by the
* current implementation, 4K are reserved to avoid future complications.
*/
#define VMCS12_SIZE 0x1000
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
struct vmcs02_list {
struct list_head list;
gpa_t vmptr;
struct loaded_vmcs vmcs02;
};
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
*/
struct nested_vmx {
/* Has the level1 guest done vmxon? */
bool vmxon;
gpa_t vmxon_ptr;
/* The guest-physical address of the current VMCS L1 keeps for L2 */
gpa_t current_vmptr;
/* The host-usable pointer to the above */
struct page *current_vmcs12_page;
struct vmcs12 *current_vmcs12;
struct vmcs *current_shadow_vmcs;
/*
* Indicates if the shadow vmcs must be updated with the
* data hold by vmcs12
*/
bool sync_shadow_vmcs;
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct list_head vmcs02_pool;
int vmcs02_num;
u64 vmcs01_tsc_offset;
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
/*
* Guest pages referred to in vmcs02 with host-physical pointers, so
* we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
struct page *virtual_apic_page;
struct page *pi_desc_page;
struct pi_desc *pi_desc;
bool pi_pending;
u16 posted_intr_nv;
u64 msr_ia32_feature_control;
struct hrtimer preemption_timer;
bool preemption_timer_expired;
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
u64 vmcs01_debugctl;
u16 vpid02;
u16 last_vpid;
u32 nested_vmx_procbased_ctls_low;
u32 nested_vmx_procbased_ctls_high;
u32 nested_vmx_true_procbased_ctls_low;
u32 nested_vmx_secondary_ctls_low;
u32 nested_vmx_secondary_ctls_high;
u32 nested_vmx_pinbased_ctls_low;
u32 nested_vmx_pinbased_ctls_high;
u32 nested_vmx_exit_ctls_low;
u32 nested_vmx_exit_ctls_high;
u32 nested_vmx_true_exit_ctls_low;
u32 nested_vmx_entry_ctls_low;
u32 nested_vmx_entry_ctls_high;
u32 nested_vmx_true_entry_ctls_low;
u32 nested_vmx_misc_low;
u32 nested_vmx_misc_high;
u32 nested_vmx_ept_caps;
u32 nested_vmx_vpid_caps;
};
#define POSTED_INTR_ON 0
#define POSTED_INTR_SN 1
/* Posted-Interrupt Descriptor */
struct pi_desc {
u32 pir[8]; /* Posted interrupt requested */
union {
struct {
/* bit 256 - Outstanding Notification */
u16 on : 1,
/* bit 257 - Suppress Notification */
sn : 1,
/* bit 271:258 - Reserved */
rsvd_1 : 14;
/* bit 279:272 - Notification Vector */
u8 nv;
/* bit 287:280 - Reserved */
u8 rsvd_2;
/* bit 319:288 - Notification Destination */
u32 ndst;
};
u64 control;
};
u32 rsvd[6];
} __aligned(64);
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
return test_and_set_bit(POSTED_INTR_ON,
(unsigned long *)&pi_desc->control);
}
static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
{
return test_and_clear_bit(POSTED_INTR_ON,
(unsigned long *)&pi_desc->control);
}
static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
{
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}
static inline void pi_clear_sn(struct pi_desc *pi_desc)
{
return clear_bit(POSTED_INTR_SN,
(unsigned long *)&pi_desc->control);
}
static inline void pi_set_sn(struct pi_desc *pi_desc)
{
return set_bit(POSTED_INTR_SN,
(unsigned long *)&pi_desc->control);
}
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
(unsigned long *)&pi_desc->control);
}
static inline int pi_test_sn(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_SN,
(unsigned long *)&pi_desc->control);
}
struct vcpu_vmx {
struct kvm_vcpu vcpu;
unsigned long host_rsp;
u8 fail;
bool nmi_known_unmasked;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
unsigned long host_idt_base;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
* guest (L2), it points to a different VMCS.
*/
struct loaded_vmcs vmcs01;
struct loaded_vmcs *loaded_vmcs;
bool __launched; /* temporary, used in vmx_vcpu_run */
struct msr_autoload {
unsigned nr;
struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
} msr_autoload;
struct {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
#ifdef CONFIG_X86_64
u16 ds_sel, es_sel;
#endif
int gs_ldt_reload_needed;
int fs_reload_needed;
u64 msr_host_bndcfgs;
unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state;
struct {
int vm86_active;
ulong save_rflags;
struct kvm_segment segs[8];
} rmode;
struct {
u32 bitmask; /* 4 bits per segment (1 bit per field) */
struct kvm_save_segment {
u16 selector;
unsigned long base;
u32 limit;
u32 ar;
} seg[8];
} segment_cache;
int vpid;
bool emulation_required;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
u32 exit_reason;
/* Posted interrupt descriptor */
struct pi_desc pi_desc;
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
/* Dynamic PLE window. */
int ple_window;
bool ple_window_dirty;
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
u64 current_tsc_ratio;
};
enum segment_cache_field {
SEG_FIELD_SEL = 0,
SEG_FIELD_BASE = 1,
SEG_FIELD_LIMIT = 2,
SEG_FIELD_AR = 3,
SEG_FIELD_NR = 4
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vmx(vcpu)->pi_desc);
}
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4
static unsigned long shadow_read_only_fields[] = {
/*
* We do NOT shadow fields that are modified when L0
* traps and emulates any vmx instruction (e.g. VMPTRLD,
* VMXON...) executed by L1.
* For example, VM_INSTRUCTION_ERROR is read
* by L1 if a vmx instruction fails (part of the error path).
* Note the code assumes this logic. If for some reason
* we start shadowing these fields then we need to
* force a shadow sync when L0 emulates vmx instructions
* (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
* by nested_vmx_failValid)
*/
VM_EXIT_REASON,
VM_EXIT_INTR_INFO,
VM_EXIT_INSTRUCTION_LEN,
IDT_VECTORING_INFO_FIELD,
IDT_VECTORING_ERROR_CODE,
VM_EXIT_INTR_ERROR_CODE,
EXIT_QUALIFICATION,
GUEST_LINEAR_ADDRESS,
GUEST_PHYSICAL_ADDRESS
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
static unsigned long shadow_read_write_fields[] = {
TPR_THRESHOLD,
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
GUEST_CR3,
GUEST_CR4,
GUEST_INTERRUPTIBILITY_INFO,
GUEST_RFLAGS,
GUEST_CS_SELECTOR,
GUEST_CS_AR_BYTES,
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
GUEST_BNDCFGS,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
TSC_OFFSET,
EXCEPTION_BITMAP,
CPU_BASED_VM_EXEC_CONTROL,
VM_ENTRY_EXCEPTION_ERROR_CODE,
VM_ENTRY_INTR_INFO_FIELD,
VM_ENTRY_INSTRUCTION_LEN,
VM_ENTRY_EXCEPTION_ERROR_CODE,
HOST_FS_BASE,
HOST_GS_BASE,
HOST_FS_SELECTOR,
HOST_GS_SELECTOR
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
static const unsigned short vmcs_field_to_offset_table[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(POSTED_INTR_NV, posted_intr_nv),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
FIELD(GUEST_INTR_STATUS, guest_intr_status),
FIELD(HOST_ES_SELECTOR, host_es_selector),
FIELD(HOST_CS_SELECTOR, host_cs_selector),
FIELD(HOST_SS_SELECTOR, host_ss_selector),
FIELD(HOST_DS_SELECTOR, host_ds_selector),
FIELD(HOST_FS_SELECTOR, host_fs_selector),
FIELD(HOST_GS_SELECTOR, host_gs_selector),
FIELD(HOST_TR_SELECTOR, host_tr_selector),
FIELD64(IO_BITMAP_A, io_bitmap_a),
FIELD64(IO_BITMAP_B, io_bitmap_b),
FIELD64(MSR_BITMAP, msr_bitmap),
FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
FIELD64(TSC_OFFSET, tsc_offset),
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
FIELD64(EPT_POINTER, ept_pointer),
FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
FIELD64(GUEST_PDPTR0, guest_pdptr0),
FIELD64(GUEST_PDPTR1, guest_pdptr1),
FIELD64(GUEST_PDPTR2, guest_pdptr2),
FIELD64(GUEST_PDPTR3, guest_pdptr3),
FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
FIELD64(HOST_IA32_PAT, host_ia32_pat),
FIELD64(HOST_IA32_EFER, host_ia32_efer),
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
FIELD(EXCEPTION_BITMAP, exception_bitmap),
FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
FIELD(CR3_TARGET_COUNT, cr3_target_count),
FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
FIELD(TPR_THRESHOLD, tpr_threshold),
FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
FIELD(VM_EXIT_REASON, vm_exit_reason),
FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
FIELD(GUEST_ES_LIMIT, guest_es_limit),
FIELD(GUEST_CS_LIMIT, guest_cs_limit),
FIELD(GUEST_SS_LIMIT, guest_ss_limit),
FIELD(GUEST_DS_LIMIT, guest_ds_limit),
FIELD(GUEST_FS_LIMIT, guest_fs_limit),
FIELD(GUEST_GS_LIMIT, guest_gs_limit),
FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
FIELD(GUEST_TR_LIMIT, guest_tr_limit),
FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
FIELD(CR0_READ_SHADOW, cr0_read_shadow),
FIELD(CR4_READ_SHADOW, cr4_read_shadow),
FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
FIELD(EXIT_QUALIFICATION, exit_qualification),
FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),