diff options
-rw-r--r-- | Makefile.target | 17 | ||||
-rwxr-xr-x | configure | 10 | ||||
-rw-r--r-- | libkvm-all.c | 1483 | ||||
-rw-r--r-- | libkvm-all.h | 868 | ||||
-rw-r--r-- | libkvm-common.h | 94 | ||||
-rw-r--r-- | qemu-kvm-ia64.c | 2 | ||||
-rw-r--r-- | qemu-kvm-x86.c | 2 | ||||
-rw-r--r-- | qemu-kvm.c | 2 | ||||
-rw-r--r-- | qemu-kvm.h | 2 | ||||
-rw-r--r-- | target-i386/libkvm.c | 666 | ||||
-rw-r--r-- | target-i386/libkvm.h | 55 | ||||
-rw-r--r-- | target-ia64/libkvm.c | 82 | ||||
-rw-r--r-- | target-ia64/libkvm.h | 31 | ||||
-rw-r--r-- | target-ppc/libkvm.c | 100 | ||||
-rw-r--r-- | target-ppc/libkvm.h | 36 |
15 files changed, 3423 insertions, 27 deletions
diff --git a/Makefile.target b/Makefile.target index cd88a2028..aac93eddf 100644 --- a/Makefile.target +++ b/Makefile.target @@ -160,7 +160,7 @@ CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif ifeq ($(USE_KVM), 1) -LIBOBJS+=qemu-kvm.o +LIBOBJS+=qemu-kvm.o libkvm.o libkvm-all.o endif ifdef CONFIG_SOFTFLOAT LIBOBJS+=fpu/softfloat.o @@ -579,10 +579,6 @@ ifdef CONFIG_CS4231A SOUND_HW += cs4231a.o endif -ifdef USE_KVM -DEPLIBS += libkvm.a -endif - ifdef CONFIG_VNC_TLS CPPFLAGS += $(CONFIG_VNC_TLS_CFLAGS) LIBS += $(CONFIG_VNC_TLS_LIBS) @@ -802,14 +798,6 @@ $(QEMU_PROG): $(OBJS) ../libqemu_common.a libqemu.a $(HWLIB) $(QEMU_PROG): ARLIBS += $(DEPLIBS) $(QEMU_PROG): $(DEPLIBS) -FORCE: - -libkvm.a: FORCE - $(MAKE) -C ../kvm/libkvm KVM_CFLAGS="$(KVM_CFLAGS)" - if ! cmp -s libkvm.a ../kvm/libkvm/libkvm.a; then \ - cp ../kvm/libkvm/libkvm.a . ; \ - fi - endif # !CONFIG_USER_ONLY gdbstub-xml.c: $(TARGET_XML_FILES) feature_to_c.sh @@ -825,9 +813,6 @@ qemu-options.h: $(SRC_PATH)/qemu-options.hx clean: rm -f *.o *.a *~ $(PROGS) nwfpe/*.o fpu/*.o qemu-options.h gdbstub-xml.c rm -f *.d */*.d tcg/*.o -ifdef USE_KVM - $(MAKE) -C ../kvm/libkvm clean -endif install: all ifneq ($(PROGS),) @@ -538,8 +538,6 @@ if test "$werror" = "yes" ; then CFLAGS="$CFLAGS -Werror" fi -CFLAGS="$CFLAGS -I$(readlink -f "$source_path/kvm/libkvm")" - if test "$solaris" = "no" ; then if ld --version 2>/dev/null | grep "GNU ld" >/dev/null 2>/dev/null ; then LDFLAGS="$LDFLAGS -Wl,--warn-common" @@ -856,7 +854,7 @@ kvm_cflags="$kvm_cflags -I$source_path/kvm/include/$kvm_arch" # test for KVM_CAP_PIT cat > $TMPC <<EOF -#include <libkvm.h> +#include <linux/kvm.h> #ifndef KVM_CAP_PIT #error "kvm no pit capability" #endif @@ -869,7 +867,7 @@ EOF # test for KVM_CAP_DEVICE_ASSIGNMENT cat > $TMPC <<EOF -#include <libkvm.h> +#include <linux/kvm.h> #ifndef KVM_CAP_DEVICE_ASSIGNMENT #error "kvm no device assignment capability" #endif @@ -1844,7 +1842,7 @@ else exit 1 fi -# this is a temp hack needed for libkvm +# this is a temp hack needed for kvm if test "$kvm" = "yes" ; then echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak fi @@ -2245,11 +2243,9 @@ done # for target in $targets # build tree in object directory if source path is different from current one if test "$source_path_used" = "yes" ; then DIRS="tests tests/cris slirp audio block" - DIRS="$DIRS kvm/libkvm" FILES="Makefile tests/Makefile" FILES="$FILES tests/cris/Makefile tests/cris/.gdbinit" FILES="$FILES tests/test-mmap.c" - FILES="$FILES kvm/libkvm/Makefile" for dir in $DIRS ; do mkdir -p $dir done diff --git a/libkvm-all.c b/libkvm-all.c new file mode 100644 index 000000000..1668e327a --- /dev/null +++ b/libkvm-all.c @@ -0,0 +1,1483 @@ +/* + * Kernel-based Virtual Machine control library + * + * This library provides an API to control the kvm hardware virtualization + * module. + * + * Copyright (C) 2006 Qumranet + * + * Authors: + * + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#ifndef __user +#define __user /* temporary, until installed via make headers_install */ +#endif + +#include <linux/kvm.h> + +#define EXPECTED_KVM_API_VERSION 12 + +#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION +#error libkvm: userspace and kernel version mismatch +#endif + +#include <unistd.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <inttypes.h> +#include "libkvm-all.h" + +#include "libkvm.h" + +//#define DEBUG_MEMREG +#ifdef DEBUG_MEMREG +#define DPRINTF(fmt, args...) \ + do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0) +#else +#define DPRINTF(fmt, args...) do {} while (0) +#endif + +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + +int kvm_abi = EXPECTED_KVM_API_VERSION; +int kvm_page_size; + +static inline void set_gsi(kvm_context_t kvm, unsigned int gsi) +{ + uint32_t *bitmap = kvm->used_gsi_bitmap; + + if (gsi < kvm->max_gsi) + bitmap[gsi / 32] |= 1U << (gsi % 32); + else + DPRINTF("Invalid GSI %d\n"); +} + +static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi) +{ + uint32_t *bitmap = kvm->used_gsi_bitmap; + + if (gsi < kvm->max_gsi) + bitmap[gsi / 32] &= ~(1U << (gsi % 32)); + else + DPRINTF("Invalid GSI %d\n"); +} + +struct slot_info { + unsigned long phys_addr; + unsigned long len; + unsigned long userspace_addr; + unsigned flags; + int logging_count; +}; + +struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS]; + +static void init_slots(void) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) + slots[i].len = 0; +} + +static int get_free_slot(kvm_context_t kvm) +{ + int i; + int tss_ext; + +#if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__) + tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); +#else + tss_ext = 0; +#endif + + /* + * on older kernels where the set tss ioctl is not supprted we must save + * slot 0 to hold the extended memory, as the vmx will use the last 3 + * pages of this slot. + */ + if (tss_ext > 0) + i = 0; + else + i = 1; + + for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i) + if (!slots[i].len) + return i; + return -1; +} + +static void register_slot(int slot, unsigned long phys_addr, unsigned long len, + unsigned long userspace_addr, unsigned flags) +{ + slots[slot].phys_addr = phys_addr; + slots[slot].len = len; + slots[slot].userspace_addr = userspace_addr; + slots[slot].flags = flags; +} + +static void free_slot(int slot) +{ + slots[slot].len = 0; + slots[slot].logging_count = 0; +} + +static int get_slot(unsigned long phys_addr) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) { + if (slots[i].len && slots[i].phys_addr <= phys_addr && + (slots[i].phys_addr + slots[i].len-1) >= phys_addr) + return i; + } + return -1; +} + +/* Returns -1 if this slot is not totally contained on any other, + * and the number of the slot otherwise */ +static int get_container_slot(uint64_t phys_addr, unsigned long size) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) + if (slots[i].len && slots[i].phys_addr <= phys_addr && + (slots[i].phys_addr + slots[i].len) >= phys_addr + size) + return i; + return -1; +} + +int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size) +{ + int slot = get_container_slot(phys_addr, size); + if (slot == -1) + return 0; + return 1; +} + +/* + * dirty pages logging control + */ +static int kvm_dirty_pages_log_change(kvm_context_t kvm, + unsigned long phys_addr, + unsigned flags, + unsigned mask) +{ + int r = -1; + int slot = get_slot(phys_addr); + + if (slot == -1) { + fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__); + return 1; + } + + flags = (slots[slot].flags & ~mask) | flags; + if (flags == slots[slot].flags) + return 0; + slots[slot].flags = flags; + + { + struct kvm_userspace_memory_region mem = { + .slot = slot, + .memory_size = slots[slot].len, + .guest_phys_addr = slots[slot].phys_addr, + .userspace_addr = slots[slot].userspace_addr, + .flags = slots[slot].flags, + }; + + + DPRINTF("slot %d start %llx len %llx flags %x\n", + mem.slot, + mem.guest_phys_addr, + mem.memory_size, + mem.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); + if (r == -1) + fprintf(stderr, "%s: %m\n", __FUNCTION__); + } + return r; +} + +static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, + int (*change)(kvm_context_t kvm, + uint64_t start, + uint64_t len)) +{ + int i, r; + + for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) { + if (slots[i].len) + r = change(kvm, slots[i].phys_addr, slots[i].len); + } + return r; +} + +int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, + uint64_t phys_addr, + uint64_t len) +{ + int slot = get_slot(phys_addr); + + DPRINTF("start %"PRIx64" len %"PRIx64"\n", phys_addr, len); + if (slot == -1) { + fprintf(stderr, "BUG: %s: invalid parameters\n", __func__); + return -EINVAL; + } + + if (slots[slot].logging_count++) + return 0; + + return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, + KVM_MEM_LOG_DIRTY_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); +} + +int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, + uint64_t phys_addr, + uint64_t len) +{ + int slot = get_slot(phys_addr); + + if (slot == -1) { + fprintf(stderr, "BUG: %s: invalid parameters\n", __func__); + return -EINVAL; + } + + if (--slots[slot].logging_count) + return 0; + + return kvm_dirty_pages_log_change(kvm, slots[slot].phys_addr, + 0, + KVM_MEM_LOG_DIRTY_PAGES); +} + +/** + * Enable dirty page logging for all memory regions + */ +int kvm_dirty_pages_log_enable_all(kvm_context_t kvm) +{ + if (kvm->dirty_pages_log_all) + return 0; + kvm->dirty_pages_log_all = 1; + return kvm_dirty_pages_log_change_all(kvm, + kvm_dirty_pages_log_enable_slot); +} + +/** + * Enable dirty page logging only for memory regions that were created with + * dirty logging enabled (disable for all other memory regions). + */ +int kvm_dirty_pages_log_reset(kvm_context_t kvm) +{ + if (!kvm->dirty_pages_log_all) + return 0; + kvm->dirty_pages_log_all = 0; + return kvm_dirty_pages_log_change_all(kvm, + kvm_dirty_pages_log_disable_slot); +} + + +kvm_context_t kvm_init(struct kvm_callbacks *callbacks, + void *opaque) +{ + int fd; + kvm_context_t kvm; + int r, gsi_count; + + fd = open("/dev/kvm", O_RDWR); + if (fd == -1) { + perror("open /dev/kvm"); + return NULL; + } + r = ioctl(fd, KVM_GET_API_VERSION, 0); + if (r == -1) { + fprintf(stderr, "kvm kernel version too old: " + "KVM_GET_API_VERSION ioctl not supported\n"); + goto out_close; + } + if (r < EXPECTED_KVM_API_VERSION) { + fprintf(stderr, "kvm kernel version too old: " + "We expect API version %d or newer, but got " + "version %d\n", + EXPECTED_KVM_API_VERSION, r); + goto out_close; + } + if (r > EXPECTED_KVM_API_VERSION) { + fprintf(stderr, "kvm userspace version too old\n"); + goto out_close; + } + kvm_abi = r; + kvm_page_size = getpagesize(); + kvm = malloc(sizeof(*kvm)); + if (kvm == NULL) + goto out_close; + memset(kvm, 0, sizeof(*kvm)); + kvm->fd = fd; + kvm->vm_fd = -1; + kvm->callbacks = callbacks; + kvm->opaque = opaque; + kvm->dirty_pages_log_all = 0; + kvm->no_irqchip_creation = 0; + kvm->no_pit_creation = 0; + + gsi_count = kvm_get_gsi_count(kvm); + if (gsi_count > 0) { + int gsi_bits, i; + + /* Round up so we can search ints using ffs */ + gsi_bits = ALIGN(gsi_count, 32); + kvm->used_gsi_bitmap = malloc(gsi_bits / 8); + if (!kvm->used_gsi_bitmap) + goto out_close; + memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8); + kvm->max_gsi = gsi_bits; + + /* Mark any over-allocated bits as already in use */ + for (i = gsi_count; i < gsi_bits; i++) + set_gsi(kvm, i); + } + + return kvm; + out_close: + close(fd); + return NULL; +} + +void kvm_finalize(kvm_context_t kvm) +{ + if (kvm->vcpu_fd[0] != -1) + close(kvm->vcpu_fd[0]); + if (kvm->vm_fd != -1) + close(kvm->vm_fd); + close(kvm->fd); + free(kvm); +} + +void kvm_disable_irqchip_creation(kvm_context_t kvm) +{ + kvm->no_irqchip_creation = 1; +} + +void kvm_disable_pit_creation(kvm_context_t kvm) +{ + kvm->no_pit_creation = 1; +} + +int kvm_create_vcpu(kvm_context_t kvm, int slot) +{ + long mmap_size; + int r; + + r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, slot); + if (r == -1) { + r = -errno; + fprintf(stderr, "kvm_create_vcpu: %m\n"); + return r; + } + kvm->vcpu_fd[slot] = r; + mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size == -1) { + r = -errno; + fprintf(stderr, "get vcpu mmap size: %m\n"); + return r; + } + kvm->run[slot] = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, + kvm->vcpu_fd[slot], 0); + if (kvm->run[slot] == MAP_FAILED) { + r = -errno; + fprintf(stderr, "mmap vcpu area: %m\n"); + return r; + } + return 0; +} + +int kvm_create_vm(kvm_context_t kvm) +{ + int fd = kvm->fd; + +#ifdef KVM_CAP_IRQ_ROUTING + kvm->irq_routes = malloc(sizeof(*kvm->irq_routes)); + if (!kvm->irq_routes) + return -ENOMEM; + memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes)); + kvm->nr_allocated_irq_routes = 0; +#endif + + kvm->vcpu_fd[0] = -1; + + fd = ioctl(fd, KVM_CREATE_VM, 0); + if (fd == -1) { + fprintf(stderr, "kvm_create_vm: %m\n"); + return -1; + } + kvm->vm_fd = fd; + return 0; +} + +static int kvm_create_default_phys_mem(kvm_context_t kvm, + unsigned long phys_mem_bytes, + void **vm_mem) +{ +#ifdef KVM_CAP_USER_MEMORY + int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); + if (r > 0) + return 0; + fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n"); +#else +#error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported +#endif + return -1; +} + +int kvm_check_extension(kvm_context_t kvm, int ext) +{ + int ret; + + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext); + if (ret > 0) + return ret; + return 0; +} + +void kvm_create_irqchip(kvm_context_t kvm) +{ + int r; + + kvm->irqchip_in_kernel = 0; +#ifdef KVM_CAP_IRQCHIP + if (!kvm->no_irqchip_creation) { + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP); + if (r > 0) { /* kernel irqchip supported */ + r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); + if (r >= 0) { + kvm->irqchip_inject_ioctl = KVM_IRQ_LINE; +#if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS) + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_IRQ_INJECT_STATUS); + if (r > 0) + kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; +#endif + kvm->irqchip_in_kernel = 1; + } + else + fprintf(stderr, "Create kernel PIC irqchip failed\n"); + } + } +#endif +} + +int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem) +{ + int r; + + r = kvm_create_vm(kvm); + if (r < 0) + return r; + r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem); + if (r < 0) + return r; + init_slots(); + r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem); + if (r < 0) + return r; + kvm_create_irqchip(kvm); + + return 0; +} + + +void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start, + unsigned long len, int log, int writable) +{ + int r; + int prot = PROT_READ; + void *ptr; + struct kvm_userspace_memory_region memory = { + .memory_size = len, + .guest_phys_addr = phys_start, + .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, + }; + + if (writable) + prot |= PROT_WRITE; + +#if !defined(__s390__) + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); +#else + ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC, + MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0); +#endif + if (ptr == MAP_FAILED) { + fprintf(stderr, "%s: %s", __func__, strerror(errno)); + return 0; + } + + memset(ptr, 0, len); + + memory.userspace_addr = (unsigned long)ptr; + memory.slot = get_free_slot(kvm); + DPRINTF("slot %d start %llx len %llx flags %x\n", + memory.slot, + memory.guest_phys_addr, + memory.memory_size, + memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "%s: %s", __func__, strerror(errno)); + return 0; + } + register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.flags); + + return ptr; +} + +int kvm_register_phys_mem(kvm_context_t kvm, + unsigned long phys_start, void *userspace_addr, + unsigned long len, int log) +{ + + struct kvm_userspace_memory_region memory = { + .memory_size = len, + .guest_phys_addr = phys_start, + .userspace_addr = (unsigned long)(intptr_t)userspace_addr, + .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, + }; + int r; + + memory.slot = get_free_slot(kvm); + DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n", + memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.slot, memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno)); + return -1; + } + register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.flags); + return 0; +} + + +/* destroy/free a whole slot. + * phys_start, len and slot are the params passed to kvm_create_phys_mem() + */ +void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start, + unsigned long len) +{ + int slot; + int r; + struct kvm_userspace_memory_region memory = { + .memory_size = 0, + .guest_phys_addr = phys_start, + .userspace_addr = 0, + .flags = 0, + }; + + slot = get_slot(phys_start); + + if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) { + fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", + __FUNCTION__, slot); + return; + } + if (phys_start != slots[slot].phys_addr) { + fprintf(stderr, + "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n", + __FUNCTION__, phys_start, slots[slot].phys_addr); + phys_start = slots[slot].phys_addr; + } + + memory.slot = slot; + DPRINTF("slot %d start %llx len %llx flags %x\n", + memory.slot, + memory.guest_phys_addr, + memory.memory_size, + memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "destroy_userspace_phys_mem: %s", + strerror(errno)); + return; + } + + free_slot(memory.slot); +} + +void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size) +{ + + int slot = get_container_slot(phys_addr, size); + + if (slot != -1) { + DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size); + kvm_destroy_phys_mem(kvm, phys_addr, size); + return; + } +} + +static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf) +{ + int r; + struct kvm_dirty_log log = { + .slot = slot, + }; + + log.dirty_bitmap = buf; + + r = ioctl(kvm->vm_fd, ioctl_num, &log); + if (r == -1) + return -errno; + return 0; +} + +int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf) +{ + int slot; + + slot = get_slot(phys_addr); + return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf); +} + +int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr, + unsigned long len, void *buf, void *opaque, + int (*cb)(unsigned long start, unsigned long len, + void*bitmap, void *opaque)) +{ + int i; + int r; + unsigned long end_addr = phys_addr + len; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) { + if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr) + && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) { + r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf); + if (r) + return r; + r = cb(slots[i].phys_addr, slots[i].len, buf, opaque); + if (r) + return r; + } + } + return 0; +} + +#ifdef KVM_CAP_IRQCHIP + +int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status) +{ + struct kvm_irq_level event; + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + event.level = level; + event.irq = irq; + r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event); + if (r == -1) + perror("kvm_set_irq_level"); + + if (status) { +#ifdef KVM_CAP_IRQ_INJECT_STATUS + *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? + 1 : event.status; +#else + *status = 1; +#endif + } + + return 1; +} + +int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) +{ + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip); + if (r == -1) { + r = -errno; + perror("kvm_get_irqchip\n"); + } + return r; +} + +int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) +{ + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip); + if (r == -1) { + r = -errno; + perror("kvm_set_irqchip\n"); + } + return r; +} + +#endif + +static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu) +{ + uint16_t addr = run->io.port; + int r; + int i; + void *p = (void *)run + run->io.data_offset; + + for (i = 0; i < run->io.count; ++i) { + switch (run->io.direction) { + case KVM_EXIT_IO_IN: + switch (run->io.size) { + case 1: + r = kvm->callbacks->inb(kvm->opaque, addr, p); + break; + case 2: + r = kvm->callbacks->inw(kvm->opaque, addr, p); + break; + case 4: + r = kvm->callbacks->inl(kvm->opaque, addr, p); + break; + default: + fprintf(stderr, "bad I/O size %d\n", run->io.size); + return -EMSGSIZE; + } + break; + case KVM_EXIT_IO_OUT: + switch (run->io.size) { + case 1: + r = kvm->callbacks->outb(kvm->opaque, addr, + *(uint8_t *)p); + break; + case 2: + r = kvm->callbacks->outw(kvm->opaque, addr, + *(uint16_t *)p); + break; + case 4: + r = kvm->callbacks->outl(kvm->opaque, addr, + *(uint32_t *)p); + break; + default: + fprintf(stderr, "bad I/O size %d\n", run->io.size); + return -EMSGSIZE; + } + break; + default: + fprintf(stderr, "bad I/O direction %d\n", run->io.direction); + return -EPROTO; + } + + p += run->io.size; + } + + return 0; +} + +int handle_debug(kvm_context_t kvm, int vcpu, void *env) +{ +#ifdef KVM_CAP_SET_GUEST_DEBUG + struct kvm_run *run = kvm->run[vcpu]; + + return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch); +#else + return 0; +#endif +} + +int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs); +} + +int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs); +} + +int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_FPU, fpu); +} + +int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_FPU, fpu); +} + +int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs); +} + +int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs); +} + +#ifdef KVM_CAP_MP_STATE +int kvm_get_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state) +{ + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); + if (r > 0) + return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MP_STATE, mp_state); + return -ENOSYS; +} + +int kvm_set_mpstate(kvm_context_t kvm, int vcpu, struct kvm_mp_state *mp_state) +{ + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); + if (r > 0) + return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MP_STATE, mp_state); + return -ENOSYS; +} +#endif + +static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run) +{ + unsigned long addr = kvm_run->mmio.phys_addr; + void *data = kvm_run->mmio.data; + + /* hack: Red Hat 7.1 generates these weird accesses. */ + if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) + return 0; + + if (kvm_run->mmio.is_write) + return kvm->callbacks->mmio_write(kvm->opaque, addr, data, + kvm_run->mmio.len); + else + return kvm->callbacks->mmio_read(kvm->opaque, addr, data, + kvm_run->mmio.len); +} + +int handle_io_window(kvm_context_t kvm) +{ + return kvm->callbacks->io_window(kvm->opaque); +} + +int handle_halt(kvm_context_t kvm, int vcpu) +{ + return kvm->callbacks->halt(kvm->opaque, vcpu); +} + +int handle_shutdown(kvm_context_t kvm, void *env) +{ + return kvm->callbacks->shutdown(kvm->opaque, env); +} + +int try_push_interrupts(kvm_context_t kvm) +{ + return kvm->callbacks->try_push_interrupts(kvm->opaque); +} + +static inline void push_nmi(kvm_context_t kvm) +{ +#ifdef KVM_CAP_USER_NMI + kvm->callbacks->push_nmi(kvm->opaque); +#endif /* KVM_CAP_USER_NMI */ +} + +void post_kvm_run(kvm_context_t kvm, void *env) +{ + kvm->callbacks->post_kvm_run(kvm->opaque, env); +} + +int pre_kvm_run(kvm_context_t kvm, void *env) +{ + return kvm->callbacks->pre_kvm_run(kvm->opaque, env); +} + +int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu) +{ + struct kvm_run *run = kvm->run[vcpu]; + + return run->if_flag; +} + +int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu) +{ + struct kvm_run *run = kvm->run[vcpu]; + + return run->ready_for_interrupt_injection; +} + +int kvm_run(kvm_context_t kvm, int vcpu, void *env) +{ + int r; + int fd = kvm->vcpu_fd[vcpu]; + struct kvm_run *run = kvm->run[vcpu]; + +again: + push_nmi(kvm); +#if !defined(__s390__) + if (!kvm->irqchip_in_kernel) + run->request_interrupt_window = try_push_interrupts(kvm); +#endif + r = pre_kvm_run(kvm, env); + if (r) + return r; + r = ioctl(fd, KVM_RUN, 0); + + if (r == -1 && errno != EINTR && errno != EAGAIN) { + r = -errno; + post_kvm_run(kvm, env); + fprintf(stderr, "kvm_run: %s\n", strerror(-r)); + return r; + } + + post_kvm_run(kvm, env); + +#if defined(KVM_CAP_COALESCED_MMIO) + if (kvm->coalesced_mmio) { + struct kvm_coalesced_mmio_ring *ring = (void *)run + + kvm->coalesced_mmio * PAGE_SIZE; + while (ring->first != ring->last) { + kvm->callbacks->mmio_write(kvm->opaque, + ring->coalesced_mmio[ring->first].phys_addr, + &ring->coalesced_mmio[ring->first].data[0], + ring->coalesced_mmio[ring->first].len); + smp_wmb(); + ring->first = (ring->first + 1) % + KVM_COALESCED_MMIO_MAX; + } + } +#endif + +#if !defined(__s390__) + if (r == -1) { + r = handle_io_window(kvm); + goto more; + } +#endif + if (1) { + switch (run->exit_reason) { + case KVM_EXIT_UNKNOWN: + fprintf(stderr, "unhandled vm exit: 0x%x vcpu_id %d\n", + (unsigned)run->hw.hardware_exit_reason, vcpu); + kvm_show_regs(kvm, vcpu); + abort(); + break; + case KVM_EXIT_FAIL_ENTRY: + fprintf(stderr, "kvm_run: failed entry, reason %u\n", + (unsigned)run->fail_entry.hardware_entry_failure_reason & 0xffff); + kvm_show_regs(kvm, vcpu); + return -ENOEXEC; + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "exception %d (%x)\n", + run->ex.exception, + run->ex.error_code); + kvm_show_regs(kvm, vcpu); + kvm_show_code(kvm, vcpu); + abort(); + break; + case KVM_EXIT_IO: + r = handle_io(kvm, run, vcpu); + break; + case KVM_EXIT_DEBUG: + r = handle_debug(kvm, vcpu, env); + break; + case KVM_EXIT_MMIO: + r = handle_mmio(kvm, run); + break; + case KVM_EXIT_HLT: + r = handle_halt(kvm, vcpu); + break; + case KVM_EXIT_IRQ_WINDOW_OPEN: + break; + case KVM_EXIT_SHUTDOWN: + r = handle_shutdown(kvm, env); + break; +#if defined(__s390__) + case KVM_EXIT_S390_SIEIC: + r = kvm->callbacks->s390_handle_intercept(kvm, vcpu, + run); + break; + case KVM_EXIT_S390_RESET: + r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run); + break; +#endif + default: + if (kvm_arch_run(run, kvm, vcpu)) { + fprintf(stderr, "unhandled vm exit: 0x%x\n", + run->exit_reason); + kvm_show_regs(kvm, vcpu); + abort(); + } + break; + } + } +more: + if (!r) + goto again; + return r; +} + +int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq) +{ + struct kvm_interrupt intr; + + intr.irq = irq; + return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr); +} + +#ifdef KVM_CAP_SET_GUEST_DEBUG +int kvm_set_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_guest_debug *dbg) +{ + return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_GUEST_DEBUG, dbg); +} +#endif + +int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset) +{ + struct kvm_signal_mask *sigmask; + int r; + + if (!sigset) { + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, NULL); + if (r == -1) + r = -errno; + return r; + } + sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset)); + if (!sigmask) + return -ENOMEM; + + sigmask->len = 8; + memcpy(sigmask->sigset, sigset, sizeof(*sigset)); + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SIGNAL_MASK, sigmask); + if (r == -1) + r = -errno; + free(sigmask); + return r; +} + +int kvm_irqchip_in_kernel(kvm_context_t kvm) +{ + return kvm->irqchip_in_kernel; +} + +int kvm_pit_in_kernel(kvm_context_t kvm) +{ + return kvm->pit_in_kernel; +} + +int kvm_has_sync_mmu(kvm_context_t kvm) +{ + int r = 0; +#ifdef KVM_CAP_SYNC_MMU + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU); +#endif + return r; +} + +int kvm_inject_nmi(kvm_context_t kvm, int vcpu) +{ +#ifdef KVM_CAP_USER_NMI + return ioctl(kvm->vcpu_fd[vcpu], KVM_NMI); +#else + return -ENOSYS; +#endif +} + +int kvm_init_coalesced_mmio(kvm_context_t kvm) +{ + int r = 0; + kvm->coalesced_mmio = 0; +#ifdef KVM_CAP_COALESCED_MMIO + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); + if (r > 0) { + kvm->coalesced_mmio = r; + return 0; + } +#endif + return r; +} + +int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) +{ +#ifdef KVM_CAP_COALESCED_MMIO + struct kvm_coalesced_mmio_zone zone; + int r; + + if (kvm->coalesced_mmio) { + + zone.addr = addr; + zone.size = size; + + r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone); + if (r == -1) { + perror("kvm_register_coalesced_mmio_zone"); + return -errno; + } + return 0; + } +#endif + return -ENOSYS; +} + +int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) +{ +#ifdef KVM_CAP_COALESCED_MMIO + struct kvm_coalesced_mmio_zone zone; + int r; + + if (kvm->coalesced_mmio) { + + zone.addr = addr; + zone.size = size; + + r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone); + if (r == -1) { + perror("kvm_unregister_coalesced_mmio_zone"); + return -errno; + } + DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size); + return 0; + } +#endif + return -ENOSYS; +} + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); + if (ret < 0) + return -errno; + + return ret; +} + +static int kvm_old_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq); + if (ret < 0) + return -errno; + + return ret; +} + +#ifdef KVM_CAP_ASSIGN_DEV_IRQ +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ); + if (ret > 0) { + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq); + if (ret < 0) + return -errno; + return ret; + } + + return kvm_old_assign_irq(kvm, assigned_irq); +} + +int kvm_deassign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq); + if (ret < 0) + return -errno; + + return ret; +} +#else +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + return kvm_old_assign_irq(kvm, assigned_irq); +} +#endif +#endif + +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT +int kvm_deassign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev); + if (ret < 0) + return -errno; + + return ret; +} +#endif + +int kvm_destroy_memory_region_works(kvm_context_t kvm) +{ + int ret = 0; + +#ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_DESTROY_MEMORY_REGION_WORKS); + if (ret <= 0) + ret = 0; +#endif + return ret; +} + +int kvm_reinject_control(kvm_context_t kvm, int pit_reinject) +{ +#ifdef KVM_CAP_REINJECT_CONTROL + int r; + struct kvm_reinject_control control; + + control.pit_reinject = pit_reinject; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL); + if (r > 0) { + r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control); + if (r == -1) + return -errno; + return r; + } +#endif + return -ENOSYS; +} + +int kvm_has_gsi_routing(kvm_context_t kvm) +{ + int r = 0; + +#ifdef KVM_CAP_IRQ_ROUTING + r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); +#endif + return r; +} + +int kvm_get_gsi_count(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); +#else + return -EINVAL; +#endif +} + +int kvm_clear_gsi_routes(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + kvm->irq_routes->nr = 0; + return 0; +#else + return -EINVAL; +#endif +} + +int kvm_add_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *z; + struct kvm_irq_routing_entry *new; + int n, size; + + if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) { + n = kvm->nr_allocated_irq_routes * 2; + if (n < 64) + n = 64; + size = sizeof(struct kvm_irq_routing); + size += n * sizeof(*new); + z = realloc(kvm->irq_routes, size); + if (!z) + return -ENOMEM; + kvm->nr_allocated_irq_routes = n; + kvm->irq_routes = z; + } + n = kvm->irq_routes->nr++; + new = &kvm->irq_routes->entries[n]; + memset(new, 0, sizeof(*new)); + new->gsi = entry->gsi; + new->type = entry->type; + new->flags = entry->flags; + new->u = entry->u; + + set_gsi(kvm, entry->gsi); + + return 0; +#else + return -ENOSYS; +#endif +} + +int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry e; + + e.gsi = gsi; + e.type = KVM_IRQ_ROUTING_IRQCHIP; + e.flags = 0; + e.u.irqchip.irqchip = irqchip; + e.u.irqchip.pin = pin; + return kvm_add_routing_entry(kvm, &e); +#else + return -ENOSYS; +#endif +} + +int kvm_del_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry *e, *p; + int i, gsi, found = 0; + + gsi = entry->gsi; + + for (i = 0; i < kvm->irq_routes->nr; ++i) { + e = &kvm->irq_routes->entries[i]; + if (e->type == entry->type + && e->gsi == gsi) { + switch (e->type) + { + case KVM_IRQ_ROUTING_IRQCHIP: { + if (e->u.irqchip.irqchip == + entry->u.irqchip.irqchip + && e->u.irqchip.pin == + entry->u.irqchip.pin) { + p = &kvm->irq_routes-> + entries[--kvm->irq_routes->nr]; + *e = *p; + found = 1; + } + break; + } + case KVM_IRQ_ROUTING_MSI: { + if (e->u.msi.address_lo == + entry->u.msi.address_lo + && e->u.msi.address_hi == + entry->u.msi.address_hi + && e->u.msi.data == entry->u.msi.data) { + p = &kvm->irq_routes-> + entries[--kvm->irq_routes->nr]; + *e = *p; + found = 1; + } + break; + } + default: + break; + } + if (found) { + /* If there are no other users of this GSI + * mark it available in the bitmap */ + for (i = 0; i < kvm->irq_routes->nr; i++) { + e = &kvm->irq_routes->entries[i]; + if (e->gsi == gsi) + break; + } + if (i == kvm->irq_routes->nr) + clear_gsi(kvm, gsi); + + return 0; + } + } + } + return -ESRCH; +#else + return -ENOSYS; +#endif +} + +int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry e; + + e.gsi = gsi; + e.type = KVM_IRQ_ROUTING_IRQCHIP; + e.flags = 0; + e.u.irqchip.irqchip = irqchip; + e.u.irqchip.pin = pin; + return kvm_del_routing_entry(kvm, &e); +#else + return -ENOSYS; +#endif +} + +int kvm_commit_irq_routes(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + int r; + + kvm->irq_routes->flags = 0; + r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes); + if (r == -1) + r = -errno; + return r; +#else + return -ENOSYS; +#endif +} + +int kvm_get_irq_route_gsi(kvm_context_t kvm) +{ + int i, bit; + uint32_t *buf = kvm->used_gsi_bitmap; + + /* Return the lowest unused GSI in the bitmap */ + for (i = 0; i < kvm->max_gsi / 32; i++) { + bit = ffs(~buf[i]); + if (!bit) + continue; + + return bit - 1 + i * 32; + } + + return -ENOSPC; +} + +#ifdef KVM_CAP_DEVICE_MSIX +int kvm_assign_set_msix_nr(kvm_context_t kvm, + struct kvm_assigned_msix_nr *msix_nr) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr); + if (ret < 0) + return -errno; + + return ret; +} + +int kvm_assign_set_msix_entry(kvm_context_t kvm, + struct kvm_assigned_msix_entry *entry) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry); + if (ret < 0) + return -errno; + + return ret; +} +#endif diff --git a/libkvm-all.h b/libkvm-all.h new file mode 100644 index 000000000..4821a1e4c --- /dev/null +++ b/libkvm-all.h @@ -0,0 +1,868 @@ +/** \file libkvm.h + * libkvm API + */ + +#ifndef LIBKVM_H +#define LIBKVM_H + +#if defined(__s390__) +#include <asm/ptrace.h> +#endif + +#include <stdint.h> + +#ifndef __user +#define __user /* temporary, until installed via make headers_install */ +#endif + +#include <linux/kvm.h> + +#include <signal.h> + +struct kvm_context; + +typedef struct kvm_context *kvm_context_t; + +#if defined(__x86_64__) || defined(__i386__) +struct kvm_msr_list *kvm_get_msr_list(kvm_context_t); +int kvm_get_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n); +int kvm_set_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n); +#endif + +/*! + * \brief KVM callbacks structure + * + * This structure holds pointers to various functions that KVM will call + * when it encounters something that cannot be virtualized, such as + * accessing hardware devices via MMIO or regular IO. + */ +struct kvm_callbacks { + /// For 8bit IO reads from the guest (Usually when executing 'inb') + int (*inb)(void *opaque, uint16_t addr, uint8_t *data); + /// For 16bit IO reads from the guest (Usually when executing 'inw') + int (*inw)(void *opaque, uint16_t addr, uint16_t *data); + /// For 32bit IO reads from the guest (Usually when executing 'inl') + int (*inl)(void *opaque, uint16_t addr, uint32_t *data); + /// For 8bit IO writes from the guest (Usually when executing 'outb') + int (*outb)(void *opaque, uint16_t addr, uint8_t data); + /// For 16bit IO writes from the guest (Usually when executing 'outw') + int (*outw)(void *opaque, uint16_t addr, uint16_t data); + /// For 32bit IO writes from the guest (Usually when executing 'outl') + int (*outl)(void *opaque, uint16_t addr, uint32_t data); + /// generic memory reads to unmapped memory (For MMIO devices) + int (*mmio_read)(void *opaque, uint64_t addr, uint8_t *data, + int len); + /// generic memory writes to unmapped memory (For MMIO devices) + int (*mmio_write)(void *opaque, uint64_t addr, uint8_t *data, + int len); +#ifdef KVM_CAP_SET_GUEST_DEBUG + int (*debug)(void *opaque, void *env, + struct kvm_debug_exit_arch *arch_info); +#endif + /*! + * \brief Called when the VCPU issues an 'hlt' instruction. + * + * Typically, you should yeild here to prevent 100% CPU utilization + * on the host CPU. + */ + int (*halt)(void *opaque, int vcpu); + int (*shutdown)(void *opaque, void *env); + int (*io_window)(void *opaque); + int (*try_push_interrupts)(void *opaque); +#ifdef KVM_CAP_USER_NMI + void (*push_nmi)(void *opaque); +#endif + void (*post_kvm_run)(void *opaque, void *env); + int (*pre_kvm_run)(void *opaque, void *env); + int (*tpr_access)(void *opaque, int vcpu, uint64_t rip, int is_write); +#if defined(__powerpc__) + int (*powerpc_dcr_read)(int vcpu, uint32_t dcrn, uint32_t *data); + int (*powerpc_dcr_write)(int vcpu, uint32_t dcrn, uint32_t data); +#endif +#if defined(__s390__) + int (*s390_handle_intercept)(kvm_context_t context, int vcpu, + struct kvm_run *run); + int (*s390_handle_reset)(kvm_context_t context, int vcpu, + struct kvm_run *run); +#endif +}; + +/*! + * \brief Create new KVM context + * + * This creates a new kvm_context. A KVM context is a small area of data that + * holds information about the KVM instance that gets created by this call.\n + * This should always be your first call to KVM. + * + * \param callbacks Pointer to a valid kvm_callbacks structure + * \param opaque Not used + * \return NULL on failure + */ +kvm_context_t kvm_init(struct kvm_callbacks *callbacks, + void *opaque); + +/*! + * \brief Cleanup the KVM context + * + * Should always be called when closing down KVM.\n + * Exception: If kvm_init() fails, this function should not be called, as the + * context would be invalid + * + * \param kvm Pointer to the kvm_context that is to be freed + */ +void kvm_finalize(kvm_context_t kvm); + +/*! + * \brief Disable the in-kernel IRQCHIP creation + * + * In-kernel irqchip is enabled by default. If userspace irqchip is to be used, + * this should be called prior to kvm_create(). + * + * \param kvm Pointer to the kvm_context + */ +void kvm_disable_irqchip_creation(kvm_context_t kvm); + +/*! + * \brief Disable the in-kernel PIT creation + * + * In-kernel pit is enabled by default. If userspace pit is to be used, + * this should be called prior to kvm_create(). + * + * \param kvm Pointer to the kvm_context + */ +void kvm_disable_pit_creation(kvm_context_t kvm); + +/*! + * \brief Create new virtual machine + * + * This creates a new virtual machine, maps physical RAM to it, and creates a + * virtual CPU for it.\n + * \n + * Memory gets mapped for addresses 0->0xA0000, 0xC0000->phys_mem_bytes + * + * \param kvm Pointer to the current kvm_context + * \param phys_mem_bytes The amount of physical ram you want the VM to have + * \param phys_mem This pointer will be set to point to the memory that + * kvm_create allocates for physical RAM + * \return 0 on success + */ +int kvm_create(kvm_context_t kvm, + unsigned long phys_mem_bytes, + void **phys_mem); +int kvm_create_vm(kvm_context_t kvm); +int kvm_check_extension(kvm_context_t kvm, int ext); +void kvm_create_irqchip(kvm_context_t kvm); + +/*! + * \brief Create a new virtual cpu + * + * This creates a new virtual cpu (the first vcpu is created by kvm_create()). + * Should be called from a thread dedicated to the vcpu. + * + * \param kvm kvm context + * \param slot vcpu number (> 0) + * \return 0 on success, -errno on failure + */ +int kvm_create_vcpu(kvm_context_t kvm, int slot); + +/*! + * \brief Start the VCPU + * + * This starts the VCPU and virtualization is started.\n + * \n + * This function will not return until any of these conditions are met: + * - An IO/MMIO handler does not return "0" + * - An exception that neither the guest OS, nor KVM can handle occurs + * + * \note This function will call the callbacks registered in kvm_init() + * to emulate those functions + * \note If you at any point want to interrupt the VCPU, kvm_run() will + * listen to the EINTR signal. This allows you to simulate external interrupts + * and asyncronous IO. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be started + * \return 0 on success, but you really shouldn't expect this function to + * return except for when an error has occured, or when you have sent it + * an EINTR signal. + */ +int kvm_run(kvm_context_t kvm, int vcpu, void *env); + +/*! + * \brief Get interrupt flag from on last exit to userspace + * + * This gets the CPU interrupt flag as it was on the last exit to userspace. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return interrupt flag value (0 or 1) + */ +int kvm_get_interrupt_flag(kvm_context_t kvm, int vcpu); + +/*! + * \brief Get the value of the APIC_BASE msr as of last exit to userspace + * + * This gets the APIC_BASE msr as it was on the last exit to userspace. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return APIC_BASE msr contents + */ +uint64_t kvm_get_apic_base(kvm_context_t kvm, int vcpu); + +/*! + * \brief Check if a vcpu is ready for interrupt injection + * + * This checks if vcpu interrupts are not masked by mov ss or sti. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return boolean indicating interrupt injection readiness + */ +int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu); + +/*! + * \brief Read VCPU registers + * + * This gets the GP registers from the VCPU and outputs them + * into a kvm_regs structure + * + * \note This function returns a \b copy of the VCPUs registers.\n + * If you wish to modify the VCPUs GP registers, you should call kvm_set_regs() + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param regs Pointer to a kvm_regs which will be populated with the VCPUs + * registers values + * \return 0 on success + */ +int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs); + +/*! + * \brief Write VCPU registers + * + * This sets the GP registers on the VCPU from a kvm_regs structure + * + * \note When this function returns, the regs pointer and the data it points to + * can be discarded + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param regs Pointer to a kvm_regs which will be populated with the VCPUs + * registers values + * \return 0 on success + */ +int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs); +/*! + * \brief Read VCPU fpu registers + * + * This gets the FPU registers from the VCPU and outputs them + * into a kvm_fpu structure + * + * \note This function returns a \b copy of the VCPUs registers.\n + * If you wish to modify the VCPU FPU registers, you should call kvm_set_fpu() + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param fpu Pointer to a kvm_fpu which will be populated with the VCPUs + * fpu registers values + * \return 0 on success + */ +int kvm_get_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu); + +/*! + * \brief Write VCPU fpu registers + * + * This sets the FPU registers on the VCPU from a kvm_fpu structure + * + * \note When this function returns, the fpu pointer and the data it points to + * can be discarded + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param fpu Pointer to a kvm_fpu which holds the new vcpu fpu state + * \return 0 on success + */ +int kvm_set_fpu(kvm_context_t kvm, int vcpu, struct kvm_fpu *fpu); + +/*! + * \brief Read VCPU system registers + * + * This gets the non-GP registers from the VCPU and outputs them + * into a kvm_sregs structure + * + * \note This function returns a \b copy of the VCPUs registers.\n + * If you wish to modify the VCPUs non-GP registers, you should call + * kvm_set_sregs() + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs + * registers values + * \return 0 on success + */ +int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs); + +/*! + * \brief Write VCPU system registers + * + * This sets the non-GP registers on the VCPU from a kvm_sregs structure + * + * \note When this function returns, the regs pointer and the data it points to + * can be discarded + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs + * registers values + * \return 0 on success + */ +int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs); + +#ifdef KVM_CAP_MP_STATE +/*! + * * \brief Read VCPU MP state + * + */ +int kvm_get_mpstate(kvm_context_t kvm, int vcpu, + struct kvm_mp_state *mp_state); + +/*! + * * \brief Write VCPU MP state + * + */ +int kvm_set_mpstate(kvm_context_t kvm, int vcpu, + struct kvm_mp_state *mp_state); +/*! + * * \brief Reset VCPU MP state + * + */ +static inline int kvm_reset_mpstate(kvm_context_t kvm, int vcpu) +{ + struct kvm_mp_state mp_state = {.mp_state = KVM_MP_STATE_UNINITIALIZED}; + return kvm_set_mpstate(kvm, vcpu, &mp_state); +} +#endif + +/*! + * \brief Simulate an external vectored interrupt + * + * This allows you to simulate an external vectored interrupt. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param irq Vector number + * \return 0 on success + */ +int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq); + +#ifdef KVM_CAP_SET_GUEST_DEBUG +int kvm_set_guest_debug(kvm_context_t, int vcpu, struct kvm_guest_debug *dbg); +#endif + +#if defined(__i386__) || defined(__x86_64__) +/*! + * \brief Setup a vcpu's cpuid instruction emulation + * + * Set up a table of cpuid function to cpuid outputs.\n + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be initialized + * \param nent number of entries to be installed + * \param entries cpuid function entries table + * \return 0 on success, or -errno on error + */ +int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent, + struct kvm_cpuid_entry *entries); + +/*! + * \brief Setup a vcpu's cpuid instruction emulation + * + * Set up a table of cpuid function to cpuid outputs. + * This call replaces the older kvm_setup_cpuid interface by adding a few + * parameters to support cpuid functions that have sub-leaf values. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be initialized + * \param nent number of entries to be installed + * \param entries cpuid function entries table + * \return 0 on success, or -errno on error + */ +int kvm_setup_cpuid2(kvm_context_t kvm, int vcpu, int nent, + struct kvm_cpuid_entry2 *entries); + +/*! + * \brief Setting the number of shadow pages to be allocated to the vm + * + * \param kvm pointer to kvm_context + * \param nrshadow_pages number of pages to be allocated + */ +int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages); + +/*! + * \brief Getting the number of shadow pages that are allocated to the vm + * + * \param kvm pointer to kvm_context + * \param nrshadow_pages number of pages to be allocated + */ +int kvm_get_shadow_pages(kvm_context_t kvm , unsigned int *nrshadow_pages); + +/*! + * \brief Set up cr8 for next time the vcpu is executed + * + * This is a fast setter for cr8, which will be applied when the + * vcpu next enters guest mode. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \param cr8 next cr8 value + */ +void kvm_set_cr8(kvm_context_t kvm, int vcpu, uint64_t cr8); + +/*! + * \brief Get cr8 for sync tpr in qemu apic emulation + * + * This is a getter for cr8, which used to sync with the tpr in qemu + * apic emualtion. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + */ +__u64 kvm_get_cr8(kvm_context_t kvm, int vcpu); +#endif + +/*! + * \brief Set a vcpu's signal mask for guest mode + * + * A vcpu can have different signals blocked in guest mode and user mode. + * This allows guest execution to be interrupted on a signal, without requiring + * that the signal be delivered to a signal handler (the signal can be + * dequeued using sigwait(2). + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be initialized + * \param sigset signal mask for guest mode + * \return 0 on success, or -errno on error + */ +int kvm_set_signal_mask(kvm_context_t kvm, int vcpu, const sigset_t *sigset); + +/*! + * \brief Dump all VCPU information + * + * This dumps \b all the information that KVM has about a virtual CPU, namely: + * - GP Registers + * - System registers (selectors, descriptors, etc) + * - VMCS Data + * - MSRS + * - Pending interrupts + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return 0 on success + */ +int kvm_dump_vcpu(kvm_context_t kvm, int vcpu); + +/*! + * \brief Dump VCPU registers + * + * This dumps some of the information that KVM has about a virtual CPU, namely: + * - GP Registers + * + * A much more verbose version of this is available as kvm_dump_vcpu() + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return 0 on success + */ +void kvm_show_regs(kvm_context_t kvm, int vcpu); + + +void *kvm_create_phys_mem(kvm_context_t, unsigned long phys_start, + unsigned long len, int log, int writable); +void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, + unsigned long len); +void kvm_unregister_memory_area(kvm_context_t, uint64_t phys_start, + unsigned long len); + +int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_start, unsigned long size); +int kvm_register_phys_mem(kvm_context_t kvm, + unsigned long phys_start, void *userspace_addr, + unsigned long len, int log); +int kvm_get_dirty_pages(kvm_context_t, unsigned long phys_addr, void *buf); +int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr, + unsigned long end_addr, void *buf, void*opaque, + int (*cb)(unsigned long start, unsigned long len, + void*bitmap, void *opaque)); +int kvm_register_coalesced_mmio(kvm_context_t kvm, + uint64_t addr, uint32_t size); +int kvm_unregister_coalesced_mmio(kvm_context_t kvm, + uint64_t addr, uint32_t size); + +/*! + * \brief Create a memory alias + * + * Aliases a portion of physical memory to another portion. If the guest + * accesses the alias region, it will behave exactly as if it accessed + * the target memory. + */ +int kvm_create_memory_alias(kvm_context_t, + uint64_t phys_start, uint64_t len, + uint64_t target_phys); + +/*! + * \brief Destroy a memory alias + * + * Removes an alias created with kvm_create_memory_alias(). + */ +int kvm_destroy_memory_alias(kvm_context_t, uint64_t phys_start); + +/*! + * \brief Get a bitmap of guest ram pages which are allocated to the guest. + * + * \param kvm Pointer to the current kvm_context + * \param phys_addr Memory slot phys addr + * \param bitmap Long aligned address of a big enough bitmap (one bit per page) + */ +int kvm_get_mem_map(kvm_context_t kvm, unsigned long phys_addr, void *bitmap); +int kvm_get_mem_map_range(kvm_context_t kvm, unsigned long phys_addr, + unsigned long len, void *buf, void *opaque, + int (*cb)(unsigned long start,unsigned long len, + void* bitmap, void* opaque)); +int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status); + +int kvm_dirty_pages_log_enable_slot(kvm_context_t kvm, + uint64_t phys_start, + uint64_t len); +int kvm_dirty_pages_log_disable_slot(kvm_context_t kvm, + uint64_t phys_start, + uint64_t len); +/*! + * \brief Enable dirty-pages-logging for all memory regions + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_dirty_pages_log_enable_all(kvm_context_t kvm); + +/*! + * \brief Disable dirty-page-logging for some memory regions + * + * Disable dirty-pages-logging for those memory regions that were + * created with dirty-page-logging disabled. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_dirty_pages_log_reset(kvm_context_t kvm); + +/*! + * \brief Query whether in kernel irqchip is used + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_irqchip_in_kernel(kvm_context_t kvm); + +int kvm_has_sync_mmu(kvm_context_t kvm); + +#ifdef KVM_CAP_IRQCHIP +/*! + * \brief Dump in kernel IRQCHIP contents + * + * Dump one of the in kernel irq chip devices, including PIC (master/slave) + * and IOAPIC into a kvm_irqchip structure + * + * \param kvm Pointer to the current kvm_context + * \param chip The irq chip device to be dumped + */ +int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip); + +/*! + * \brief Set in kernel IRQCHIP contents + * + * Write one of the in kernel irq chip devices, including PIC (master/slave) + * and IOAPIC + * + * + * \param kvm Pointer to the current kvm_context + * \param chip THe irq chip device to be written + */ +int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip); + +#if defined(__i386__) || defined(__x86_64__) +/*! + * \brief Get in kernel local APIC for vcpu + * + * Save the local apic state including the timer of a virtual CPU + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be accessed + * \param s Local apic state of the specific virtual CPU + */ +int kvm_get_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s); + +/*! + * \brief Set in kernel local APIC for vcpu + * + * Restore the local apic state including the timer of a virtual CPU + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should be accessed + * \param s Local apic state of the specific virtual CPU + */ +int kvm_set_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s); + +#endif + +/*! + * \brief Simulate an NMI + * + * This allows you to simulate a non-maskable interrupt. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu Which virtual CPU should get dumped + * \return 0 on success + */ +int kvm_inject_nmi(kvm_context_t kvm, int vcpu); + +#endif + +/*! + * \brief Query wheather in kernel pit is used + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_pit_in_kernel(kvm_context_t kvm); + +/*! + * \brief Initialize coalesced MMIO + * + * Check for coalesced MMIO capability and store in context + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_init_coalesced_mmio(kvm_context_t kvm); + +#ifdef KVM_CAP_PIT + +#if defined(__i386__) || defined(__x86_64__) +/*! + * \brief Get in kernel PIT of the virtual domain + * + * Save the PIT state. + * + * \param kvm Pointer to the current kvm_context + * \param s PIT state of the virtual domain + */ +int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s); + +/*! + * \brief Set in kernel PIT of the virtual domain + * + * Restore the PIT state. + * Timer would be retriggerred after restored. + * + * \param kvm Pointer to the current kvm_context + * \param s PIT state of the virtual domain + */ +int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s); +#endif + +int kvm_reinject_control(kvm_context_t kvm, int pit_reinject); + +#endif + +#ifdef KVM_CAP_VAPIC + +/*! + * \brief Enable kernel tpr access reporting + * + * When tpr access reporting is enabled, the kernel will call the + * ->tpr_access() callback every time the guest vcpu accesses the tpr. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu vcpu to enable tpr access reporting on + */ +int kvm_enable_tpr_access_reporting(kvm_context_t kvm, int vcpu); + +/*! + * \brief Disable kernel tpr access reporting + * + * Undoes the effect of kvm_enable_tpr_access_reporting(). + * + * \param kvm Pointer to the current kvm_context + * \param vcpu vcpu to disable tpr access reporting on + */ +int kvm_disable_tpr_access_reporting(kvm_context_t kvm, int vcpu); + +int kvm_enable_vapic(kvm_context_t kvm, int vcpu, uint64_t vapic); + +#endif + +#if defined(__s390__) +int kvm_s390_initial_reset(kvm_context_t kvm, int slot); +int kvm_s390_interrupt(kvm_context_t kvm, int slot, + struct kvm_s390_interrupt *kvmint); +int kvm_s390_set_initial_psw(kvm_context_t kvm, int slot, psw_t psw); +int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr); +#endif + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +/*! + * \brief Notifies host kernel about a PCI device to be assigned to a guest + * + * Used for PCI device assignment, this function notifies the host + * kernel about the assigning of the physical PCI device to a guest. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_dev Parameters, like bus, devfn number, etc + */ +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev); + +/*! + * \brief Assign IRQ for an assigned device + * + * Used for PCI device assignment, this function assigns IRQ numbers for + * an physical device and guest IRQ handling. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc + */ +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq); + +#ifdef KVM_CAP_ASSIGN_DEV_IRQ +/*! + * \brief Deassign IRQ for an assigned device + * + * Used for PCI device assignment, this function deassigns IRQ numbers + * for an assigned device. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc + */ +int kvm_deassign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq); +#endif +#endif + +/*! + * \brief Determines whether destroying memory regions is allowed + * + * KVM before 2.6.29 had a bug when destroying memory regions. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_destroy_memory_region_works(kvm_context_t kvm); + +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT +/*! + * \brief Notifies host kernel about a PCI device to be deassigned from a guest + * + * Used for hot remove PCI device, this function notifies the host + * kernel about the deassigning of the physical PCI device from a guest. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_dev Parameters, like bus, devfn number, etc + */ +int kvm_deassign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev); +#endif + +/*! + * \brief Checks whether the generic irq routing capability is present + * + * Checks whether kvm can reroute interrupts among the various interrupt + * controllers. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_has_gsi_routing(kvm_context_t kvm); + +/*! + * \brief Determines the number of gsis that can be routed + * + * Returns the number of distinct gsis that can be routed by kvm. This is + * also the number of distinct routes (if a gsi has two routes, than another + * gsi cannot be used...) + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_get_gsi_count(kvm_context_t kvm); + +/*! + * \brief Clears the temporary irq routing table + * + * Clears the temporary irq routing table. Nothing is committed to the + * running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_clear_gsi_routes(kvm_context_t kvm); + +/*! + * \brief Adds an irq route to the temporary irq routing table + * + * Adds an irq route to the temporary irq routing table. Nothing is + * committed to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin); + +/*! + * \brief Removes an irq route from the temporary irq routing table + * + * Adds an irq route to the temporary irq routing table. Nothing is + * committed to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin); + +struct kvm_irq_routing_entry; +/*! + * \brief Adds a routing entry to the temporary irq routing table + * + * Adds a filled routing entry to the temporary irq routing table. Nothing is + * committed to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_add_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry); + +/*! + * \brief Removes a routing from the temporary irq routing table + * + * Remove a routing to the temporary irq routing table. Nothing is + * committed to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_del_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry); + +/*! + * \brief Commit the temporary irq routing table + * + * Commit the temporary irq routing table to the running VM. + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_commit_irq_routes(kvm_context_t kvm); + +/*! + * \brief Get unused GSI number for irq routing table + * + * Get unused GSI number for irq routing table + * + * \param kvm Pointer to the current kvm_context + */ +int kvm_get_irq_route_gsi(kvm_context_t kvm); + +#ifdef KVM_CAP_DEVICE_MSIX +int kvm_assign_set_msix_nr(kvm_context_t kvm, + struct kvm_assigned_msix_nr *msix_nr); +int kvm_assign_set_msix_entry(kvm_context_t kvm, + struct kvm_assigned_msix_entry *entry); +#endif + +uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, uint32_t function, int reg); + +#endif diff --git a/libkvm-common.h b/libkvm-common.h new file mode 100644 index 000000000..c95c59169 --- /dev/null +++ b/libkvm-common.h @@ -0,0 +1,94 @@ +/* + * This header is for functions & variables that will ONLY be + * used inside libkvm. + * + * derived from libkvm.c + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#ifndef KVM_COMMON_H +#define KVM_COMMON_H + +/* FIXME: share this number with kvm */ +/* FIXME: or dynamically alloc/realloc regions */ +#ifdef __s390__ +#define KVM_MAX_NUM_MEM_REGIONS 1u +#define MAX_VCPUS 64 +#define LIBKVM_S390_ORIGIN (0UL) +#elif defined(__ia64__) +#define KVM_MAX_NUM_MEM_REGIONS 32u +#define MAX_VCPUS 256 +#else +#define KVM_MAX_NUM_MEM_REGIONS 32u +#define MAX_VCPUS 16 +#endif + + +/* kvm abi verison variable */ +extern int kvm_abi; + +/** + * \brief The KVM context + * + * The verbose KVM context + */ + +struct kvm_context { + /// Filedescriptor to /dev/kvm + int fd; + int vm_fd; + int vcpu_fd[MAX_VCPUS]; + struct kvm_run *run[MAX_VCPUS]; + /// Callbacks that KVM uses to emulate various unvirtualizable functionality + struct kvm_callbacks *callbacks; + void *opaque; + /// is dirty pages logging enabled for all regions or not + int dirty_pages_log_all; + /// do not create in-kernel irqchip if set + int no_irqchip_creation; + /// in-kernel irqchip status + int irqchip_in_kernel; + /// ioctl to use to inject interrupts + int irqchip_inject_ioctl; + /// do not create in-kernel pit if set + int no_pit_creation; + /// in-kernel pit status + int pit_in_kernel; + /// in-kernel coalesced mmio + int coalesced_mmio; +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *irq_routes; + int nr_allocated_irq_routes; +#endif + void *used_gsi_bitmap; + int max_gsi; +}; + +int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory, + void **vm_mem); +int kvm_alloc_userspace_memory(kvm_context_t kvm, unsigned long memory, + void **vm_mem); + +int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, + void **vm_mem); +int kvm_arch_run(struct kvm_run *run, kvm_context_t kvm, int vcpu); + + +void kvm_show_code(kvm_context_t kvm, int vcpu); + +int handle_halt(kvm_context_t kvm, int vcpu); +int handle_shutdown(kvm_context_t kvm, void *env); +void post_kvm_run(kvm_context_t kvm, void *env); +int pre_kvm_run(kvm_context_t kvm, void *env); +int handle_io_window(kvm_context_t kvm); +int handle_debug(kvm_context_t kvm, int vcpu, void *env); +int try_push_interrupts(kvm_context_t kvm); + +#endif diff --git a/qemu-kvm-ia64.c b/qemu-kvm-ia64.c index 66e4232c9..0e65cb4b0 100644 --- a/qemu-kvm-ia64.c +++ b/qemu-kvm-ia64.c @@ -5,7 +5,7 @@ #include "hw/hw.h" #include "qemu-kvm.h" -#include <libkvm.h> +#include "libkvm-all.h" #include <pthread.h> #include <sys/utsname.h> #include <sys/io.h> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 856b70c4e..bbe031212 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -15,7 +15,7 @@ #include <sys/io.h> #include "qemu-kvm.h" -#include <libkvm.h> +#include "libkvm-all.h" #include <pthread.h> #include <sys/utsname.h> #include <linux/kvm_para.h> diff --git a/qemu-kvm.c b/qemu-kvm.c index bc3d7db00..11ecbde68 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -19,7 +19,7 @@ #include "gdbstub.h" #include "qemu-kvm.h" -#include <libkvm.h> +#include "libkvm-all.h" #include <pthread.h> #include <sys/utsname.h> #include <sys/syscall.h> diff --git a/qemu-kvm.h b/qemu-kvm.h index dd045dd1c..4bd24dcb6 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -142,7 +142,7 @@ int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data); #define BITMAP_SIZE(m) (ALIGN(((m)>>TARGET_PAGE_BITS), HOST_LONG_BITS) / 8) #ifdef USE_KVM -#include "libkvm.h" +#include "libkvm-all.h" #include "sys-queue.h" extern int kvm_allowed; diff --git a/target-i386/libkvm.c b/target-i386/libkvm.c new file mode 100644 index 000000000..32d03f16b --- /dev/null +++ b/target-i386/libkvm.c @@ -0,0 +1,666 @@ +#include "libkvm-all.h" +#include "libkvm.h" +#include <errno.h> +#include <sys/ioctl.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> + +int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr) +{ +#ifdef KVM_CAP_SET_TSS_ADDR + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); + if (r > 0) { + r = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, addr); + if (r == -1) { + fprintf(stderr, "kvm_set_tss_addr: %m\n"); + return -errno; + } + return 0; + } +#endif + return -ENOSYS; +} + +static int kvm_init_tss(kvm_context_t kvm) +{ +#ifdef KVM_CAP_SET_TSS_ADDR + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); + if (r > 0) { + /* + * this address is 3 pages before the bios, and the bios should present + * as unavaible memory + */ + r = kvm_set_tss_addr(kvm, 0xfffbd000); + if (r < 0) { + fprintf(stderr, "kvm_init_tss: unable to set tss addr\n"); + return r; + } + + } +#endif + return 0; +} + +static int kvm_create_pit(kvm_context_t kvm) +{ +#ifdef KVM_CAP_PIT + int r; + + kvm->pit_in_kernel = 0; + if (!kvm->no_pit_creation) { + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT); + if (r > 0) { + r = ioctl(kvm->vm_fd, KVM_CREATE_PIT); + if (r >= 0) + kvm->pit_in_kernel = 1; + else { + fprintf(stderr, "Create kernel PIC irqchip failed\n"); + return r; + } + } + } +#endif + return 0; +} + +int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, + void **vm_mem) +{ + int r = 0; + + r = kvm_init_tss(kvm); + if (r < 0) + return r; + + r = kvm_create_pit(kvm); + if (r < 0) + return r; + + r = kvm_init_coalesced_mmio(kvm); + if (r < 0) + return r; + + return 0; +} + +#ifdef KVM_EXIT_TPR_ACCESS + +static int handle_tpr_access(kvm_context_t kvm, struct kvm_run *run, int vcpu) +{ + return kvm->callbacks->tpr_access(kvm->opaque, vcpu, + run->tpr_access.rip, + run->tpr_access.is_write); +} + + +int kvm_enable_vapic(kvm_context_t kvm, int vcpu, uint64_t vapic) +{ + int r; + struct kvm_vapic_addr va = { + .vapic_addr = vapic, + }; + + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_VAPIC_ADDR, &va); + if (r == -1) { + r = -errno; + perror("kvm_enable_vapic"); + return r; + } + return 0; +} + +#endif + +int kvm_arch_run(struct kvm_run *run,kvm_context_t kvm, int vcpu) +{ + int r = 0; + + switch (run->exit_reason) { +#ifdef KVM_EXIT_SET_TPR + case KVM_EXIT_SET_TPR: + break; +#endif +#ifdef KVM_EXIT_TPR_ACCESS + case KVM_EXIT_TPR_ACCESS: + r = handle_tpr_access(kvm, run, vcpu); + break; +#endif + default: + r = 1; + break; + } + + return r; +} + +#define MAX_ALIAS_SLOTS 4 +static struct { + uint64_t start; + uint64_t len; +} kvm_aliases[MAX_ALIAS_SLOTS]; + +static int get_alias_slot(uint64_t start) +{ + int i; + + for (i=0; i<MAX_ALIAS_SLOTS; i++) + if (kvm_aliases[i].start == start) + return i; + return -1; +} +static int get_free_alias_slot(void) +{ + int i; + + for (i=0; i<MAX_ALIAS_SLOTS; i++) + if (kvm_aliases[i].len == 0) + return i; + return -1; +} + +static void register_alias(int slot, uint64_t start, uint64_t len) +{ + kvm_aliases[slot].start = start; + kvm_aliases[slot].len = len; +} + +int kvm_create_memory_alias(kvm_context_t kvm, + uint64_t phys_start, + uint64_t len, + uint64_t target_phys) +{ + struct kvm_memory_alias alias = { + .flags = 0, + .guest_phys_addr = phys_start, + .memory_size = len, + .target_phys_addr = target_phys, + }; + int fd = kvm->vm_fd; + int r; + int slot; + + slot = get_alias_slot(phys_start); + if (slot < 0) + slot = get_free_alias_slot(); + if (slot < 0) + return -EBUSY; + alias.slot = slot; + + r = ioctl(fd, KVM_SET_MEMORY_ALIAS, &alias); + if (r == -1) + return -errno; + + register_alias(slot, phys_start, len); + return 0; +} + +int kvm_destroy_memory_alias(kvm_context_t kvm, uint64_t phys_start) +{ + return kvm_create_memory_alias(kvm, phys_start, 0, 0); +} + +#ifdef KVM_CAP_IRQCHIP + +int kvm_get_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s) +{ + int r; + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_LAPIC, s); + if (r == -1) { + r = -errno; + perror("kvm_get_lapic"); + } + return r; +} + +int kvm_set_lapic(kvm_context_t kvm, int vcpu, struct kvm_lapic_state *s) +{ + int r; + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_LAPIC, s); + if (r == -1) { + r = -errno; + perror("kvm_set_lapic"); + } + return r; +} + +#endif + +#ifdef KVM_CAP_PIT + +int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s) +{ + int r; + if (!kvm->pit_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_GET_PIT, s); + if (r == -1) { + r = -errno; + perror("kvm_get_pit"); + } + return r; +} + +int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s) +{ + int r; + if (!kvm->pit_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_SET_PIT, s); + if (r == -1) { + r = -errno; + perror("kvm_set_pit"); + } + return r; +} + +#endif + +void kvm_show_code(kvm_context_t kvm, int vcpu) +{ +#define SHOW_CODE_LEN 50 + int fd = kvm->vcpu_fd[vcpu]; + struct kvm_regs regs; + struct kvm_sregs sregs; + int r, n; + int back_offset; + unsigned char code; + char code_str[SHOW_CODE_LEN * 3 + 1]; + unsigned long rip; + + r = ioctl(fd, KVM_GET_SREGS, &sregs); + if (r == -1) { + perror("KVM_GET_SREGS"); + return; + } + r = ioctl(fd, KVM_GET_REGS, ®s); + if (r == -1) { + perror("KVM_GET_REGS"); + return; + } + rip = sregs.cs.base + regs.rip; + back_offset = regs.rip; + if (back_offset > 20) + back_offset = 20; + *code_str = 0; + for (n = -back_offset; n < SHOW_CODE_LEN-back_offset; ++n) { + if (n == 0) + strcat(code_str, " -->"); + r = kvm->callbacks->mmio_read(kvm->opaque, rip + n, &code, 1); + if (r < 0) { + strcat(code_str, " xx"); + continue; + } + sprintf(code_str + strlen(code_str), " %02x", code); + } + fprintf(stderr, "code:%s\n", code_str); +} + + +/* + * Returns available msr list. User must free. + */ +struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm) +{ + struct kvm_msr_list sizer, *msrs; + int r, e; + + sizer.nmsrs = 0; + r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, &sizer); + if (r == -1 && errno != E2BIG) + return NULL; + msrs = malloc(sizeof *msrs + sizer.nmsrs * sizeof *msrs->indices); + if (!msrs) { + errno = ENOMEM; + return NULL; + } + msrs->nmsrs = sizer.nmsrs; + r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, msrs); + if (r == -1) { + e = errno; + free(msrs); + errno = e; + return NULL; + } + return msrs; +} + +int kvm_get_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs, + int n) +{ + struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs); + int r, e; + + if (!kmsrs) { + errno = ENOMEM; + return -1; + } + kmsrs->nmsrs = n; + memcpy(kmsrs->entries, msrs, n * sizeof *msrs); + r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MSRS, kmsrs); + e = errno; + memcpy(msrs, kmsrs->entries, n * sizeof *msrs); + free(kmsrs); + errno = e; + return r; +} + +int kvm_set_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs, + int n) +{ + struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs); + int r, e; + + if (!kmsrs) { + errno = ENOMEM; + return -1; + } + kmsrs->nmsrs = n; + memcpy(kmsrs->entries, msrs, n * sizeof *msrs); + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MSRS, kmsrs); + e = errno; + free(kmsrs); + errno = e; + return r; +} + +static void print_seg(FILE *file, const char *name, struct kvm_segment *seg) +{ + fprintf(stderr, + "%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d" + " g %d avl %d)\n", + name, seg->selector, seg->base, seg->limit, seg->present, + seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g, + seg->avl); +} + +static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt) +{ + fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit); +} + +void kvm_show_regs(kvm_context_t kvm, int vcpu) +{ + int fd = kvm->vcpu_fd[vcpu]; + struct kvm_regs regs; + struct kvm_sregs sregs; + int r; + + r = ioctl(fd, KVM_GET_REGS, ®s); + if (r == -1) { + perror("KVM_GET_REGS"); + return; + } + fprintf(stderr, + "rax %016llx rbx %016llx rcx %016llx rdx %016llx\n" + "rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n" + "r8 %016llx r9 %016llx r10 %016llx r11 %016llx\n" + "r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n" + "rip %016llx rflags %08llx\n", + regs.rax, regs.rbx, regs.rcx, regs.rdx, + regs.rsi, regs.rdi, regs.rsp, regs.rbp, + regs.r8, regs.r9, regs.r10, regs.r11, + regs.r12, regs.r13, regs.r14, regs.r15, + regs.rip, regs.rflags); + r = ioctl(fd, KVM_GET_SREGS, &sregs); + if (r == -1) { + perror("KVM_GET_SREGS"); + return; + } + print_seg(stderr, "cs", &sregs.cs); + print_seg(stderr, "ds", &sregs.ds); + print_seg(stderr, "es", &sregs.es); + print_seg(stderr, "ss", &sregs.ss); + print_seg(stderr, "fs", &sregs.fs); + print_seg(stderr, "gs", &sregs.gs); + print_seg(stderr, "tr", &sregs.tr); + print_seg(stderr, "ldt", &sregs.ldt); + print_dt(stderr, "gdt", &sregs.gdt); + print_dt(stderr, "idt", &sregs.idt); + fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx" + " efer %llx\n", + sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8, + sregs.efer); +} + +uint64_t kvm_get_apic_base(kvm_context_t kvm, int vcpu) +{ + struct kvm_run *run = kvm->run[vcpu]; + + return run->apic_base; +} + +void kvm_set_cr8(kvm_context_t kvm, int vcpu, uint64_t cr8) +{ + struct kvm_run *run = kvm->run[vcpu]; + + run->cr8 = cr8; +} + +__u64 kvm_get_cr8(kvm_context_t kvm, int vcpu) +{ + return kvm->run[vcpu]->cr8; +} + +int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent, + struct kvm_cpuid_entry *entries) +{ + struct kvm_cpuid *cpuid; + int r; + + cpuid = malloc(sizeof(*cpuid) + nent * sizeof(*entries)); + if (!cpuid) + return -ENOMEM; + + cpuid->nent = nent; + memcpy(cpuid->entries, entries, nent * sizeof(*entries)); + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID, cpuid); + + free(cpuid); + return r; +} + +int kvm_setup_cpuid2(kvm_context_t kvm, int vcpu, int nent, + struct kvm_cpuid_entry2 *entries) +{ + struct kvm_cpuid2 *cpuid; + int r; + + cpuid = malloc(sizeof(*cpuid) + nent * sizeof(*entries)); + if (!cpuid) + return -ENOMEM; + + cpuid->nent = nent; + memcpy(cpuid->entries, entries, nent * sizeof(*entries)); + r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID2, cpuid); + if (r == -1) { + fprintf(stderr, "kvm_setup_cpuid2: %m\n"); + return -errno; + } + free(cpuid); + return r; +} + +int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages) +{ +#ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_MMU_SHADOW_CACHE_CONTROL); + if (r > 0) { + r = ioctl(kvm->vm_fd, KVM_SET_NR_MMU_PAGES, nrshadow_pages); + if (r == -1) { + fprintf(stderr, "kvm_set_shadow_pages: %m\n"); + return -errno; + } + return 0; + } +#endif + return -1; +} + +int kvm_get_shadow_pages(kvm_context_t kvm, unsigned int *nrshadow_pages) +{ +#ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL + int r; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_MMU_SHADOW_CACHE_CONTROL); + if (r > 0) { + *nrshadow_pages = ioctl(kvm->vm_fd, KVM_GET_NR_MMU_PAGES); + return 0; + } +#endif + return -1; +} + +#ifdef KVM_CAP_VAPIC + +static int tpr_access_reporting(kvm_context_t kvm, int vcpu, int enabled) +{ + int r; + struct kvm_tpr_access_ctl tac = { + .enabled = enabled, + }; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC); + if (r == -1 || r == 0) + return -ENOSYS; + r = ioctl(kvm->vcpu_fd[vcpu], KVM_TPR_ACCESS_REPORTING, &tac); + if (r == -1) { + r = -errno; + perror("KVM_TPR_ACCESS_REPORTING"); + return r; + } + return 0; +} + +int kvm_enable_tpr_access_reporting(kvm_context_t kvm, int vcpu) +{ + return tpr_access_reporting(kvm, vcpu, 1); +} + +int kvm_disable_tpr_access_reporting(kvm_context_t kvm, int vcpu) +{ + return tpr_access_reporting(kvm, vcpu, 0); +} + +#endif + +#ifdef KVM_CAP_EXT_CPUID + +static struct kvm_cpuid2 *try_get_cpuid(kvm_context_t kvm, int max) +{ + struct kvm_cpuid2 *cpuid; + int r, size; + + size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); + cpuid = (struct kvm_cpuid2 *)malloc(size); + cpuid->nent = max; + r = ioctl(kvm->fd, KVM_GET_SUPPORTED_CPUID, cpuid); + if (r == -1) + r = -errno; + else if (r == 0 && cpuid->nent >= max) + r = -E2BIG; + if (r < 0) { + if (r == -E2BIG) { + free(cpuid); + return NULL; + } else { + fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", + strerror(-r)); + exit(1); + } + } + return cpuid; +} + +#define R_EAX 0 +#define R_ECX 1 +#define R_EDX 2 +#define R_EBX 3 +#define R_ESP 4 +#define R_EBP 5 +#define R_ESI 6 +#define R_EDI 7 + +uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, uint32_t function, int reg) +{ + struct kvm_cpuid2 *cpuid; + int i, max; + uint32_t ret = 0; + uint32_t cpuid_1_edx; + + if (!kvm_check_extension(kvm, KVM_CAP_EXT_CPUID)) { + return -1U; + } + + max = 1; + while ((cpuid = try_get_cpuid(kvm, max)) == NULL) { + max *= 2; + } + + for (i = 0; i < cpuid->nent; ++i) { + if (cpuid->entries[i].function == function) { + switch (reg) { + case R_EAX: + ret = cpuid->entries[i].eax; + break; + case R_EBX: + ret = cpuid->entries[i].ebx; + break; + case R_ECX: + ret = cpuid->entries[i].ecx; + break; + case R_EDX: + ret = cpuid->entries[i].edx; + if (function == 1) { + /* kvm misreports the following features + */ + ret |= 1 << 12; /* MTRR */ + ret |= 1 << 16; /* PAT */ + ret |= 1 << 7; /* MCE */ + ret |= 1 << 14; /* MCA */ + } + + /* On Intel, kvm returns cpuid according to + * the Intel spec, so add missing bits + * according to the AMD spec: + */ + if (function == 0x80000001) { + cpuid_1_edx = kvm_get_supported_cpuid(kvm, 1, R_EDX); + ret |= cpuid_1_edx & 0xdfeff7ff; + } + break; + } + } + } + + free(cpuid); + + return ret; +} + +#else + +uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, uint32_t function, int reg) +{ + return -1U; +} + +#endif diff --git a/target-i386/libkvm.h b/target-i386/libkvm.h new file mode 100644 index 000000000..307fbccf3 --- /dev/null +++ b/target-i386/libkvm.h @@ -0,0 +1,55 @@ +/* + * This header is for functions & variables that will ONLY be + * used inside libkvm for x86. + * THESE ARE NOT EXPOSED TO THE USER AND ARE ONLY FOR USE + * WITHIN LIBKVM. + * + * derived from libkvm.c + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#ifndef KVM_X86_H +#define KVM_X86_H + +#include "libkvm-common.h" + +#define PAGE_SIZE 4096ul +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr); + +#ifdef KVM_CAP_VAPIC + +/*! + * \brief Enable kernel tpr access reporting + * + * When tpr access reporting is enabled, the kernel will call the + * ->tpr_access() callback every time the guest vcpu accesses the tpr. + * + * \param kvm Pointer to the current kvm_context + * \param vcpu vcpu to enable tpr access reporting on + */ +int kvm_enable_tpr_access_reporting(kvm_context_t kvm, int vcpu); + +/*! + * \brief Disable kernel tpr access reporting + * + * Undoes the effect of kvm_enable_tpr_access_reporting(). + * + * \param kvm Pointer to the current kvm_context + * \param vcpu vcpu to disable tpr access reporting on + */ +int kvm_disable_tpr_access_reporting(kvm_context_t kvm, int vcpu); + +#endif + +#define smp_wmb() asm volatile("" ::: "memory") + +#endif diff --git a/target-ia64/libkvm.c b/target-ia64/libkvm.c new file mode 100644 index 000000000..48669de6d --- /dev/null +++ b/target-ia64/libkvm.c @@ -0,0 +1,82 @@ +/* + * libkvm-ia64.c :Kernel-based Virtual Machine control library for ia64. + * + * This library provides an API to control the kvm hardware virtualization + * module. + * + * Copyright (C) 2006 Qumranet + * + * Authors: + * + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * Copyright (C) 2007 Intel + * Added by : Zhang Xiantao <xiantao.zhang@intel.com> + * + * This work is licensed under the GNU LGPL license, version 2. + * + */ + +#include "libkvm-all.h" +#include "libkvm.h" +#include <errno.h> +#include <sys/ioctl.h> +#include <string.h> +#include <unistd.h> +#include <stropts.h> +#include <sys/mman.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> + +int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, + void **vm_mem) +{ + int r; + + r = kvm_init_coalesced_mmio(kvm); + if (r < 0) + return r; + + return 0; +} + +int kvm_arch_run(struct kvm_run *run,kvm_context_t kvm, int vcpu) +{ + int r = 0; + + switch (run->exit_reason) { + default: + r = 1; + break; + } + + return r; +} + +void kvm_show_code(kvm_context_t kvm, int vcpu) +{ + fprintf(stderr, "kvm_show_code not supported yet!\n"); +} + +void kvm_show_regs(kvm_context_t kvm, int vcpu) +{ + fprintf(stderr,"kvm_show_regs not supportted today!\n"); +} + +int kvm_create_memory_alias(kvm_context_t kvm, + uint64_t phys_start, + uint64_t len, + uint64_t target_phys) +{ + return 0; +} + +int kvm_destroy_memory_alias(kvm_context_t kvm, uint64_t phys_start) +{ + return 0; +} diff --git a/target-ia64/libkvm.h b/target-ia64/libkvm.h new file mode 100644 index 000000000..f084420ee --- /dev/null +++ b/target-ia64/libkvm.h @@ -0,0 +1,31 @@ +/* + * This header is for functions & variables that will ONLY be + * used inside libkvm for x86. + * THESE ARE NOT EXPOSED TO THE USER AND ARE ONLY FOR USE + * WITHIN LIBKVM. + * + * derived from libkvm.c + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#ifndef KVM_IA64_H +#define KVM_IA64_H + +#include "libkvm-common.h" + +extern int kvm_page_size; + +#define PAGE_SIZE kvm_page_size +#define PAGE_MASK (~(kvm_page_size - 1)) + +#define ia64_mf() asm volatile ("mf" ::: "memory") +#define smp_wmb() ia64_mf() + +#endif diff --git a/target-ppc/libkvm.c b/target-ppc/libkvm.c new file mode 100644 index 000000000..2dfff3b79 --- /dev/null +++ b/target-ppc/libkvm.c @@ -0,0 +1,100 @@ +/* + * This file contains the powerpc specific implementation for the + * architecture dependent functions defined in kvm-common.h and + * libkvm.h + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * Copyright IBM Corp. 2007,2008 + * Authors: + * Jerone Young <jyoung5@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#include "libkvm-all.h" +#include "libkvm.h" +#include <errno.h> +#include <stdio.h> +#include <inttypes.h> + +int handle_dcr(struct kvm_run *run, kvm_context_t kvm, int vcpu) +{ + int ret = 0; + + if (run->dcr.is_write) + ret = kvm->callbacks->powerpc_dcr_write(vcpu, + run->dcr.dcrn, + run->dcr.data); + else + ret = kvm->callbacks->powerpc_dcr_read(vcpu, + run->dcr.dcrn, + &(run->dcr.data)); + + return ret; +} + +void kvm_show_code(kvm_context_t kvm, int vcpu) +{ + fprintf(stderr, "%s: Operation not supported\n", __FUNCTION__); +} + +void kvm_show_regs(kvm_context_t kvm, int vcpu) +{ + struct kvm_regs regs; + int i; + + if (kvm_get_regs(kvm, vcpu, ®s)) + return; + + fprintf(stderr,"guest vcpu #%d\n", vcpu); + fprintf(stderr,"pc: %016"PRIx64" msr: %016"PRIx64"\n", + regs.pc, regs.msr); + fprintf(stderr,"lr: %016"PRIx64" ctr: %016"PRIx64"\n", + regs.lr, regs.ctr); + fprintf(stderr,"srr0: %016"PRIx64" srr1: %016"PRIx64"\n", + regs.srr0, regs.srr1); + for (i=0; i<32; i+=4) + { + fprintf(stderr, "gpr%02d: %016"PRIx64" %016"PRIx64" %016"PRIx64 + " %016"PRIx64"\n", i, + regs.gpr[i], + regs.gpr[i+1], + regs.gpr[i+2], + regs.gpr[i+3]); + } + + fflush(stdout); +} + +int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, + void **vm_mem) +{ + int r; + + r = kvm_init_coalesced_mmio(kvm); + if (r < 0) + return r; + + return 0; +} + +int kvm_arch_run(struct kvm_run *run, kvm_context_t kvm, int vcpu) +{ + int ret = 0; + + switch (run->exit_reason){ + case KVM_EXIT_DCR: + ret = handle_dcr(run, kvm, vcpu); + break; + default: + ret = 1; + break; + } + return ret; +} diff --git a/target-ppc/libkvm.h b/target-ppc/libkvm.h new file mode 100644 index 000000000..95c314d7b --- /dev/null +++ b/target-ppc/libkvm.h @@ -0,0 +1,36 @@ +/* + * This header is for functions & variables that will ONLY be + * used inside libkvm for powerpc. + * THESE ARE NOT EXPOSED TO THE USER AND ARE ONLY FOR USE + * WITHIN LIBKVM. + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * + * Copyright 2007 IBM Corporation. + * Added by: Jerone Young <jyoung5@us.ibm.com> + * + * This work is licensed under the GNU LGPL license, version 2. + */ + +#ifndef KVM_POWERPC_H +#define KVM_POWERPC_H + +#include "libkvm-common.h" + +extern int kvm_page_size; + +#define PAGE_SIZE kvm_page_size +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static inline void eieio(void) +{ + asm volatile("eieio" : : : "memory"); +} + +#define smp_wmb() eieio() + +#endif |