Browse Source

Merge tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - Bind type1 task tracking to group_leader to facilitate vCPU hotplug
   in QEMU (Alex Williamson)

 - Sample mdev display drivers, including region-based host and guest
   Linux drivers and bochs compatible dmabuf device
   (Gerd Hoffmann)

 - Fix vfio-platform reset module leak (Geert Uytterhoeven)

 - vfio-platform error message consistency (Geert Uytterhoeven)

 - Global checking for mdev uuid collisions rather than per parent
   device (Alex Williamson)

 - Use match_string() helper (Yisheng Xie)

 - vfio-platform PM domain fixes (Geert Uytterhoeven)

 - Fix sample mbochs driver build dependency (Arnd Bergmann)

* tag 'vfio-v4.18-rc1' of git://github.com/awilliam/linux-vfio:
  samples: mbochs: add DMA_SHARED_BUFFER dependency
  vfio: platform: Fix using devices in PM Domains
  vfio: use match_string() helper
  vfio/mdev: Re-order sysfs attribute creation
  vfio/mdev: Check globally for duplicate devices
  vfio: platform: Make printed error messages more consistent
  vfio: platform: Fix reset module leak in error path
  sample: vfio bochs vbe display (host device for bochs-drm)
  sample: vfio mdev display - guest driver
  sample: vfio mdev display - host device
  vfio/type1: Fix task tracking for QEMU vCPU hotplug
Linus Torvalds 7 years ago
parent
commit
467590e055

+ 5 - 0
Documentation/vfio-mediated-device.txt

@@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows:
 * create: allocate basic resources in a driver for a mediated device
 * remove: free resources in a driver when a mediated device is destroyed
 
+(Note that mdev-core provides no implicit serialization of create/remove
+callbacks per mdev parent device, per mdev type, or any other categorization.
+Vendor drivers are expected to be fully asynchronous in this respect or
+provide their own internal resource protection.)
+
 The callbacks in the mdev_parent_ops structure are as follows:
 
 * open: open callback of mediated device

+ 36 - 66
drivers/vfio/mdev/mdev_core.c

@@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev)
 }
 EXPORT_SYMBOL(mdev_uuid);
 
-static int _find_mdev_device(struct device *dev, void *data)
-{
-	struct mdev_device *mdev;
-
-	if (!dev_is_mdev(dev))
-		return 0;
-
-	mdev = to_mdev_device(dev);
-
-	if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
-		return 1;
-
-	return 0;
-}
-
-static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
-{
-	struct device *dev;
-
-	dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
-	if (dev) {
-		put_device(dev);
-		return true;
-	}
-
-	return false;
-}
-
 /* Should be called holding parent_list_lock */
 static struct mdev_parent *__find_parent_device(struct device *dev)
 {
@@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 	}
 
 	kref_init(&parent->ref);
-	mutex_init(&parent->lock);
 
 	parent->dev = dev;
 	parent->ops = ops;
@@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev)
 {
 	struct mdev_device *mdev = to_mdev_device(dev);
 
+	mutex_lock(&mdev_list_lock);
+	list_del(&mdev->next);
+	mutex_unlock(&mdev_list_lock);
+
 	dev_dbg(&mdev->dev, "MDEV: destroying\n");
 	kfree(mdev);
 }
@@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev)
 int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 {
 	int ret;
-	struct mdev_device *mdev;
+	struct mdev_device *mdev, *tmp;
 	struct mdev_parent *parent;
 	struct mdev_type *type = to_mdev_type(kobj);
 
@@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 	if (!parent)
 		return -EINVAL;
 
-	mutex_lock(&parent->lock);
+	mutex_lock(&mdev_list_lock);
 
 	/* Check for duplicate */
-	if (mdev_device_exist(parent, uuid)) {
-		ret = -EEXIST;
-		goto create_err;
+	list_for_each_entry(tmp, &mdev_list, next) {
+		if (!uuid_le_cmp(tmp->uuid, uuid)) {
+			mutex_unlock(&mdev_list_lock);
+			ret = -EEXIST;
+			goto mdev_fail;
+		}
 	}
 
 	mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
 	if (!mdev) {
+		mutex_unlock(&mdev_list_lock);
 		ret = -ENOMEM;
-		goto create_err;
+		goto mdev_fail;
 	}
 
 	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
+	list_add(&mdev->next, &mdev_list);
+	mutex_unlock(&mdev_list_lock);
+
 	mdev->parent = parent;
 	kref_init(&mdev->ref);
 
@@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 	ret = device_register(&mdev->dev);
 	if (ret) {
 		put_device(&mdev->dev);
-		goto create_err;
+		goto mdev_fail;
 	}
 
 	ret = mdev_device_create_ops(kobj, mdev);
 	if (ret)
-		goto create_failed;
+		goto create_fail;
 
 	ret = mdev_create_sysfs_files(&mdev->dev, type);
 	if (ret) {
 		mdev_device_remove_ops(mdev, true);
-		goto create_failed;
+		goto create_fail;
 	}
 
 	mdev->type_kobj = kobj;
+	mdev->active = true;
 	dev_dbg(&mdev->dev, "MDEV: created\n");
 
-	mutex_unlock(&parent->lock);
-
-	mutex_lock(&mdev_list_lock);
-	list_add(&mdev->next, &mdev_list);
-	mutex_unlock(&mdev_list_lock);
-
-	return ret;
+	return 0;
 
-create_failed:
+create_fail:
 	device_unregister(&mdev->dev);
-
-create_err:
-	mutex_unlock(&parent->lock);
+mdev_fail:
 	mdev_put_parent(parent);
 	return ret;
 }
@@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove)
 	struct mdev_parent *parent;
 	struct mdev_type *type;
 	int ret;
-	bool found = false;
 
 	mdev = to_mdev_device(dev);
 
 	mutex_lock(&mdev_list_lock);
 	list_for_each_entry(tmp, &mdev_list, next) {
-		if (tmp == mdev) {
-			found = true;
+		if (tmp == mdev)
 			break;
-		}
 	}
 
-	if (found)
-		list_del(&mdev->next);
+	if (tmp != mdev) {
+		mutex_unlock(&mdev_list_lock);
+		return -ENODEV;
+	}
 
-	mutex_unlock(&mdev_list_lock);
+	if (!mdev->active) {
+		mutex_unlock(&mdev_list_lock);
+		return -EAGAIN;
+	}
 
-	if (!found)
-		return -ENODEV;
+	mdev->active = false;
+	mutex_unlock(&mdev_list_lock);
 
 	type = to_mdev_type(mdev->type_kobj);
 	parent = mdev->parent;
-	mutex_lock(&parent->lock);
 
 	ret = mdev_device_remove_ops(mdev, force_remove);
 	if (ret) {
-		mutex_unlock(&parent->lock);
-
-		mutex_lock(&mdev_list_lock);
-		list_add(&mdev->next, &mdev_list);
-		mutex_unlock(&mdev_list_lock);
-
+		mdev->active = true;
 		return ret;
 	}
 
 	mdev_remove_sysfs_files(dev, type);
 	device_unregister(dev);
-	mutex_unlock(&parent->lock);
 	mdev_put_parent(parent);
 
 	return 0;

+ 1 - 1
drivers/vfio/mdev/mdev_private.h

@@ -20,7 +20,6 @@ struct mdev_parent {
 	struct device *dev;
 	const struct mdev_parent_ops *ops;
 	struct kref ref;
-	struct mutex lock;
 	struct list_head next;
 	struct kset *mdev_types_kset;
 	struct list_head type_list;
@@ -34,6 +33,7 @@ struct mdev_device {
 	struct kref ref;
 	struct list_head next;
 	struct kobject *type_kobj;
+	bool active;
 };
 
 #define to_mdev_device(dev)	container_of(dev, struct mdev_device, dev)

+ 7 - 7
drivers/vfio/mdev/mdev_sysfs.c

@@ -257,24 +257,24 @@ int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
 {
 	int ret;
 
-	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
-	if (ret)
-		return ret;
-
 	ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
 	if (ret)
-		goto device_link_failed;
+		return ret;
 
 	ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type");
 	if (ret)
 		goto type_link_failed;
 
+	ret = sysfs_create_files(&dev->kobj, mdev_device_attrs);
+	if (ret)
+		goto create_files_failed;
+
 	return ret;
 
+create_files_failed:
+	sysfs_remove_link(&dev->kobj, "mdev_type");
 type_link_failed:
 	sysfs_remove_link(type->devices_kobj, dev_name(dev));
-device_link_failed:
-	sysfs_remove_files(&dev->kobj, mdev_device_attrs);
 	return ret;
 }
 

+ 22 - 8
drivers/vfio/platform/vfio_platform_common.c

@@ -17,6 +17,7 @@
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
@@ -239,6 +240,7 @@ static void vfio_platform_release(void *device_data)
 				 ret, extra_dbg ? extra_dbg : "");
 			WARN_ON(1);
 		}
+		pm_runtime_put(vdev->device);
 		vfio_platform_regions_cleanup(vdev);
 		vfio_platform_irq_cleanup(vdev);
 	}
@@ -269,6 +271,10 @@ static int vfio_platform_open(void *device_data)
 		if (ret)
 			goto err_irq;
 
+		ret = pm_runtime_get_sync(vdev->device);
+		if (ret < 0)
+			goto err_pm;
+
 		ret = vfio_platform_call_reset(vdev, &extra_dbg);
 		if (ret && vdev->reset_required) {
 			dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n",
@@ -283,6 +289,8 @@ static int vfio_platform_open(void *device_data)
 	return 0;
 
 err_rst:
+	pm_runtime_put(vdev->device);
+err_pm:
 	vfio_platform_irq_cleanup(vdev);
 err_irq:
 	vfio_platform_regions_cleanup(vdev);
@@ -630,8 +638,7 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
 	ret = device_property_read_string(dev, "compatible",
 					  &vdev->compat);
 	if (ret)
-		pr_err("VFIO: cannot retrieve compat for %s\n",
-			vdev->name);
+		pr_err("VFIO: Cannot retrieve compat for %s\n", vdev->name);
 
 	return ret;
 }
@@ -673,7 +680,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 
 	ret = vfio_platform_get_reset(vdev);
 	if (ret && vdev->reset_required) {
-		pr_err("vfio: no reset function found for device %s\n",
+		pr_err("VFIO: No reset function found for device %s\n",
 		       vdev->name);
 		return ret;
 	}
@@ -681,18 +688,24 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 	group = vfio_iommu_group_get(dev);
 	if (!group) {
 		pr_err("VFIO: No IOMMU group for device %s\n", vdev->name);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto put_reset;
 	}
 
 	ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev);
-	if (ret) {
-		vfio_iommu_group_put(group, dev);
-		return ret;
-	}
+	if (ret)
+		goto put_iommu;
 
 	mutex_init(&vdev->igate);
 
+	pm_runtime_enable(vdev->device);
 	return 0;
+
+put_iommu:
+	vfio_iommu_group_put(group, dev);
+put_reset:
+	vfio_platform_put_reset(vdev);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
 
@@ -703,6 +716,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
 	vdev = vfio_del_group_dev(dev);
 
 	if (vdev) {
+		pm_runtime_disable(vdev->device);
 		vfio_platform_put_reset(vdev);
 		vfio_iommu_group_put(dev->iommu_group, dev);
 	}

+ 3 - 8
drivers/vfio/vfio.c

@@ -630,8 +630,6 @@ static const char * const vfio_driver_whitelist[] = { "pci-stub" };
 
 static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
 {
-	int i;
-
 	if (dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(dev);
 
@@ -639,12 +637,9 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
 			return true;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
-		if (!strcmp(drv->name, vfio_driver_whitelist[i]))
-			return true;
-	}
-
-	return false;
+	return match_string(vfio_driver_whitelist,
+			    ARRAY_SIZE(vfio_driver_whitelist),
+			    drv->name) >= 0;
 }
 
 /*

+ 47 - 26
drivers/vfio/vfio_iommu_type1.c

@@ -83,6 +83,7 @@ struct vfio_dma {
 	size_t			size;		/* Map size (bytes) */
 	int			prot;		/* IOMMU_READ/WRITE */
 	bool			iommu_mapped;
+	bool			lock_cap;	/* capable(CAP_IPC_LOCK) */
 	struct task_struct	*task;
 	struct rb_root		pfn_list;	/* Ex-user pinned pfn list */
 };
@@ -253,29 +254,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn)
 	return ret;
 }
 
-static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
+static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async)
 {
 	struct mm_struct *mm;
-	bool is_current;
 	int ret;
 
 	if (!npage)
 		return 0;
 
-	is_current = (task->mm == current->mm);
-
-	mm = is_current ? task->mm : get_task_mm(task);
+	mm = async ? get_task_mm(dma->task) : dma->task->mm;
 	if (!mm)
 		return -ESRCH; /* process exited */
 
 	ret = down_write_killable(&mm->mmap_sem);
 	if (!ret) {
 		if (npage > 0) {
-			if (lock_cap ? !*lock_cap :
-			    !has_capability(task, CAP_IPC_LOCK)) {
+			if (!dma->lock_cap) {
 				unsigned long limit;
 
-				limit = task_rlimit(task,
+				limit = task_rlimit(dma->task,
 						RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
 				if (mm->locked_vm + npage > limit)
@@ -289,7 +286,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap)
 		up_write(&mm->mmap_sem);
 	}
 
-	if (!is_current)
+	if (async)
 		mmput(mm);
 
 	return ret;
@@ -400,7 +397,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
  */
 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 				  long npage, unsigned long *pfn_base,
-				  bool lock_cap, unsigned long limit)
+				  unsigned long limit)
 {
 	unsigned long pfn = 0;
 	long ret, pinned = 0, lock_acct = 0;
@@ -423,7 +420,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 	 * pages are already counted against the user.
 	 */
 	if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-		if (!lock_cap && current->mm->locked_vm + 1 > limit) {
+		if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
 			put_pfn(*pfn_base, dma->prot);
 			pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
 					limit << PAGE_SHIFT);
@@ -449,7 +446,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 		}
 
 		if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-			if (!lock_cap &&
+			if (!dma->lock_cap &&
 			    current->mm->locked_vm + lock_acct + 1 > limit) {
 				put_pfn(pfn, dma->prot);
 				pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
@@ -462,7 +459,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
 	}
 
 out:
-	ret = vfio_lock_acct(current, lock_acct, &lock_cap);
+	ret = vfio_lock_acct(dma, lock_acct, false);
 
 unpin_out:
 	if (ret) {
@@ -493,7 +490,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
 	}
 
 	if (do_accounting)
-		vfio_lock_acct(dma->task, locked - unlocked, NULL);
+		vfio_lock_acct(dma, locked - unlocked, true);
 
 	return unlocked;
 }
@@ -510,7 +507,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
 
 	ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
 	if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
-		ret = vfio_lock_acct(dma->task, 1, NULL);
+		ret = vfio_lock_acct(dma, 1, true);
 		if (ret) {
 			put_pfn(*pfn_base, dma->prot);
 			if (ret == -ENOMEM)
@@ -537,7 +534,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
 	unlocked = vfio_iova_put_vfio_pfn(dma, vpfn);
 
 	if (do_accounting)
-		vfio_lock_acct(dma->task, -unlocked, NULL);
+		vfio_lock_acct(dma, -unlocked, true);
 
 	return unlocked;
 }
@@ -829,7 +826,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 		unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
 
 	if (do_accounting) {
-		vfio_lock_acct(dma->task, -unlocked, NULL);
+		vfio_lock_acct(dma, -unlocked, true);
 		return 0;
 	}
 	return unlocked;
@@ -1044,14 +1041,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 	size_t size = map_size;
 	long npage;
 	unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-	bool lock_cap = capable(CAP_IPC_LOCK);
 	int ret = 0;
 
 	while (size) {
 		/* Pin a contiguous chunk of memory */
 		npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
-					      size >> PAGE_SHIFT, &pfn,
-					      lock_cap, limit);
+					      size >> PAGE_SHIFT, &pfn, limit);
 		if (npage <= 0) {
 			WARN_ON(!npage);
 			ret = (int)npage;
@@ -1126,8 +1121,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 	dma->iova = iova;
 	dma->vaddr = vaddr;
 	dma->prot = prot;
-	get_task_struct(current);
-	dma->task = current;
+
+	/*
+	 * We need to be able to both add to a task's locked memory and test
+	 * against the locked memory limit and we need to be able to do both
+	 * outside of this call path as pinning can be asynchronous via the
+	 * external interfaces for mdev devices.  RLIMIT_MEMLOCK requires a
+	 * task_struct and VM locked pages requires an mm_struct, however
+	 * holding an indefinite mm reference is not recommended, therefore we
+	 * only hold a reference to a task.  We could hold a reference to
+	 * current, however QEMU uses this call path through vCPU threads,
+	 * which can be killed resulting in a NULL mm and failure in the unmap
+	 * path when called via a different thread.  Avoid this problem by
+	 * using the group_leader as threads within the same group require
+	 * both CLONE_THREAD and CLONE_VM and will therefore use the same
+	 * mm_struct.
+	 *
+	 * Previously we also used the task for testing CAP_IPC_LOCK at the
+	 * time of pinning and accounting, however has_capability() makes use
+	 * of real_cred, a copy-on-write field, so we can't guarantee that it
+	 * matches group_leader, or in fact that it might not change by the
+	 * time it's evaluated.  If a process were to call MAP_DMA with
+	 * CAP_IPC_LOCK but later drop it, it doesn't make sense that they
+	 * possibly see different results for an iommu_mapped vfio_dma vs
+	 * externally mapped.  Therefore track CAP_IPC_LOCK in vfio_dma at the
+	 * time of calling MAP_DMA.
+	 */
+	get_task_struct(current->group_leader);
+	dma->task = current->group_leader;
+	dma->lock_cap = capable(CAP_IPC_LOCK);
+
 	dma->pfn_list = RB_ROOT;
 
 	/* Insert zero-sized and grow as we map chunks of it */
@@ -1162,7 +1185,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 	struct vfio_domain *d;
 	struct rb_node *n;
 	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-	bool lock_cap = capable(CAP_IPC_LOCK);
 	int ret;
 
 	/* Arbitrarily pick the first domain in the list for lookups */
@@ -1209,8 +1231,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 
 				npage = vfio_pin_pages_remote(dma, vaddr,
 							      n >> PAGE_SHIFT,
-							      &pfn, lock_cap,
-							      limit);
+							      &pfn, limit);
 				if (npage <= 0) {
 					WARN_ON(!npage);
 					ret = (int)npage;
@@ -1487,7 +1508,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
 			if (!is_invalid_reserved_pfn(vpfn->pfn))
 				locked++;
 		}
-		vfio_lock_acct(dma->task, locked - unlocked, NULL);
+		vfio_lock_acct(dma, locked - unlocked, true);
 	}
 }
 

+ 31 - 0
samples/Kconfig

@@ -115,6 +115,37 @@ config SAMPLE_VFIO_MDEV_MTTY
 	  Build a virtual tty sample driver for use as a VFIO
 	  mediated device
 
+config SAMPLE_VFIO_MDEV_MDPY
+	tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
+	depends on VFIO_MDEV_DEVICE && m
+	help
+	  Build a virtual display sample driver for use as a VFIO
+	  mediated device.  It is a simple framebuffer and supports
+	  the region display interface (VFIO_GFX_PLANE_TYPE_REGION).
+
+config SAMPLE_VFIO_MDEV_MDPY_FB
+	tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only"
+	depends on FB && m
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	  Guest fbdev driver for the virtual display sample driver.
+
+config SAMPLE_VFIO_MDEV_MBOCHS
+	tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only"
+	depends on VFIO_MDEV_DEVICE && m
+	select DMA_SHARED_BUFFER
+	help
+	  Build a virtual display sample driver for use as a VFIO
+	  mediated device.  It supports the region display interface
+	  (VFIO_GFX_PLANE_TYPE_DMABUF).
+	  Emulate enough of qemu stdvga to make bochs-drm.ko happy.
+	  That is basically the vram memory bar and the bochs dispi
+	  interface vbe registers in the mmio register bar.
+	  Specifically it does *not* include any legacy vga stuff.
+	  Device looks a lot like "qemu -device secondary-vga".
+
 config SAMPLE_STATX
 	bool "Build example extended-stat using code"
 	depends on BROKEN

+ 3 - 0
samples/vfio-mdev/Makefile

@@ -1 +1,4 @@
 obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MBOCHS) += mbochs.o

+ 1406 - 0
samples/vfio-mdev/mbochs.c

@@ -0,0 +1,1406 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * Emulate enough of qemu stdvga to make bochs-drm.ko happy.  That is
+ * basically the vram memory bar and the bochs dispi interface vbe
+ * registers in the mmio register bar.	Specifically it does *not*
+ * include any legacy vga stuff.  Device looks a lot like "qemu -device
+ * secondary-vga".
+ *
+ *   (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ *   Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *	 Author: Neo Jia <cjia@nvidia.com>
+ *		 Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_rect.h>
+#include <drm/drm_modeset_lock.h>
+#include <drm/drm_property.h>
+#include <drm/drm_plane.h>
+
+
+#define VBE_DISPI_INDEX_ID		0x0
+#define VBE_DISPI_INDEX_XRES		0x1
+#define VBE_DISPI_INDEX_YRES		0x2
+#define VBE_DISPI_INDEX_BPP		0x3
+#define VBE_DISPI_INDEX_ENABLE		0x4
+#define VBE_DISPI_INDEX_BANK		0x5
+#define VBE_DISPI_INDEX_VIRT_WIDTH	0x6
+#define VBE_DISPI_INDEX_VIRT_HEIGHT	0x7
+#define VBE_DISPI_INDEX_X_OFFSET	0x8
+#define VBE_DISPI_INDEX_Y_OFFSET	0x9
+#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+#define VBE_DISPI_INDEX_COUNT		0xb
+
+#define VBE_DISPI_ID0			0xB0C0
+#define VBE_DISPI_ID1			0xB0C1
+#define VBE_DISPI_ID2			0xB0C2
+#define VBE_DISPI_ID3			0xB0C3
+#define VBE_DISPI_ID4			0xB0C4
+#define VBE_DISPI_ID5			0xB0C5
+
+#define VBE_DISPI_DISABLED		0x00
+#define VBE_DISPI_ENABLED		0x01
+#define VBE_DISPI_GETCAPS		0x02
+#define VBE_DISPI_8BIT_DAC		0x20
+#define VBE_DISPI_LFB_ENABLED		0x40
+#define VBE_DISPI_NOCLEARMEM		0x80
+
+
+#define MBOCHS_NAME		  "mbochs"
+#define MBOCHS_CLASS_NAME	  "mbochs"
+
+#define MBOCHS_CONFIG_SPACE_SIZE  0xff
+#define MBOCHS_MMIO_BAR_OFFSET	  PAGE_SIZE
+#define MBOCHS_MMIO_BAR_SIZE	  PAGE_SIZE
+#define MBOCHS_MEMORY_BAR_OFFSET  (MBOCHS_MMIO_BAR_OFFSET + \
+				   MBOCHS_MMIO_BAR_SIZE)
+
+#define STORE_LE16(addr, val)	(*(u16 *)addr = val)
+#define STORE_LE32(addr, val)	(*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+static int max_mbytes = 256;
+module_param_named(count, max_mbytes, int, 0444);
+MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
+
+
+#define MBOCHS_TYPE_1 "small"
+#define MBOCHS_TYPE_2 "medium"
+#define MBOCHS_TYPE_3 "large"
+
+static const struct mbochs_type {
+	const char *name;
+	u32 mbytes;
+} mbochs_types[] = {
+	{
+		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
+		.mbytes = 4,
+	}, {
+		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
+		.mbytes = 16,
+	}, {
+		.name	= MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
+		.mbytes = 64,
+	},
+};
+
+
+static dev_t		mbochs_devt;
+static struct class	*mbochs_class;
+static struct cdev	mbochs_cdev;
+static struct device	mbochs_dev;
+static int		mbochs_used_mbytes;
+
+struct mbochs_mode {
+	u32 drm_format;
+	u32 bytepp;
+	u32 width;
+	u32 height;
+	u32 stride;
+	u32 __pad;
+	u64 offset;
+	u64 size;
+};
+
+struct mbochs_dmabuf {
+	struct mbochs_mode mode;
+	u32 id;
+	struct page **pages;
+	pgoff_t pagecount;
+	struct dma_buf *buf;
+	struct mdev_state *mdev_state;
+	struct list_head next;
+	bool unlinked;
+};
+
+/* State of each mdev device */
+struct mdev_state {
+	u8 *vconfig;
+	u64 bar_mask[3];
+	u32 memory_bar_mask;
+	struct mutex ops_lock;
+	struct mdev_device *mdev;
+	struct vfio_device_info dev_info;
+
+	const struct mbochs_type *type;
+	u16 vbe[VBE_DISPI_INDEX_COUNT];
+	u64 memsize;
+	struct page **pages;
+	pgoff_t pagecount;
+
+	struct list_head dmabufs;
+	u32 active_id;
+	u32 next_id;
+};
+
+static const char *vbe_name_list[VBE_DISPI_INDEX_COUNT] = {
+	[VBE_DISPI_INDEX_ID]               = "id",
+	[VBE_DISPI_INDEX_XRES]             = "xres",
+	[VBE_DISPI_INDEX_YRES]             = "yres",
+	[VBE_DISPI_INDEX_BPP]              = "bpp",
+	[VBE_DISPI_INDEX_ENABLE]           = "enable",
+	[VBE_DISPI_INDEX_BANK]             = "bank",
+	[VBE_DISPI_INDEX_VIRT_WIDTH]       = "virt-width",
+	[VBE_DISPI_INDEX_VIRT_HEIGHT]      = "virt-height",
+	[VBE_DISPI_INDEX_X_OFFSET]         = "x-offset",
+	[VBE_DISPI_INDEX_Y_OFFSET]         = "y-offset",
+	[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = "video-mem",
+};
+
+static const char *vbe_name(u32 index)
+{
+	if (index < ARRAY_SIZE(vbe_name_list))
+		return vbe_name_list[index];
+	return "(invalid)";
+}
+
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+				    pgoff_t pgoff);
+
+static const struct mbochs_type *mbochs_find_type(struct kobject *kobj)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mbochs_types); i++)
+		if (strcmp(mbochs_types[i].name, kobj->name) == 0)
+			return mbochs_types + i;
+	return NULL;
+}
+
+static void mbochs_create_config_space(struct mdev_state *mdev_state)
+{
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+		   0x1234);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+		   0x1111);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+		   PCI_SUBVENDOR_ID_REDHAT_QUMRANET);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+		   PCI_SUBDEVICE_ID_QEMU);
+
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+		   PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+		   PCI_CLASS_DISPLAY_OTHER);
+	mdev_state->vconfig[PCI_CLASS_REVISION] =  0x01;
+
+	STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+		   PCI_BASE_ADDRESS_SPACE_MEMORY |
+		   PCI_BASE_ADDRESS_MEM_TYPE_32	 |
+		   PCI_BASE_ADDRESS_MEM_PREFETCH);
+	mdev_state->bar_mask[0] = ~(mdev_state->memsize) + 1;
+
+	STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_2],
+		   PCI_BASE_ADDRESS_SPACE_MEMORY |
+		   PCI_BASE_ADDRESS_MEM_TYPE_32);
+	mdev_state->bar_mask[2] = ~(MBOCHS_MMIO_BAR_SIZE) + 1;
+}
+
+static int mbochs_check_framebuffer(struct mdev_state *mdev_state,
+				    struct mbochs_mode *mode)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	u16 *vbe = mdev_state->vbe;
+	u32 virt_width;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	if (!(vbe[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED))
+		goto nofb;
+
+	memset(mode, 0, sizeof(*mode));
+	switch (vbe[VBE_DISPI_INDEX_BPP]) {
+	case 32:
+		mode->drm_format = DRM_FORMAT_XRGB8888;
+		mode->bytepp = 4;
+		break;
+	default:
+		dev_info_ratelimited(dev, "%s: bpp %d not supported\n",
+				     __func__, vbe[VBE_DISPI_INDEX_BPP]);
+		goto nofb;
+	}
+
+	mode->width  = vbe[VBE_DISPI_INDEX_XRES];
+	mode->height = vbe[VBE_DISPI_INDEX_YRES];
+	virt_width  = vbe[VBE_DISPI_INDEX_VIRT_WIDTH];
+	if (virt_width < mode->width)
+		virt_width = mode->width;
+	mode->stride = virt_width * mode->bytepp;
+	mode->size   = (u64)mode->stride * mode->height;
+	mode->offset = ((u64)vbe[VBE_DISPI_INDEX_X_OFFSET] * mode->bytepp +
+		       (u64)vbe[VBE_DISPI_INDEX_Y_OFFSET] * mode->stride);
+
+	if (mode->width < 64 || mode->height < 64) {
+		dev_info_ratelimited(dev, "%s: invalid resolution %dx%d\n",
+				     __func__, mode->width, mode->height);
+		goto nofb;
+	}
+	if (mode->offset + mode->size > mdev_state->memsize) {
+		dev_info_ratelimited(dev, "%s: framebuffer memory overflow\n",
+				     __func__);
+		goto nofb;
+	}
+
+	return 0;
+
+nofb:
+	memset(mode, 0, sizeof(*mode));
+	return -EINVAL;
+}
+
+static bool mbochs_modes_equal(struct mbochs_mode *mode1,
+			       struct mbochs_mode *mode2)
+{
+	return memcmp(mode1, mode2, sizeof(struct mbochs_mode)) == 0;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+				 char *buf, u32 count)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	int index = (offset - PCI_BASE_ADDRESS_0) / 0x04;
+	u32 cfg_addr;
+
+	switch (offset) {
+	case PCI_BASE_ADDRESS_0:
+	case PCI_BASE_ADDRESS_2:
+		cfg_addr = *(u32 *)buf;
+
+		if (cfg_addr == 0xffffffff) {
+			cfg_addr = (cfg_addr & mdev_state->bar_mask[index]);
+		} else {
+			cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+			if (cfg_addr)
+				dev_info(dev, "BAR #%d @ 0x%x\n",
+					 index, cfg_addr);
+		}
+
+		cfg_addr |= (mdev_state->vconfig[offset] &
+			     ~PCI_BASE_ADDRESS_MEM_MASK);
+		STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+		break;
+	}
+}
+
+static void handle_mmio_write(struct mdev_state *mdev_state, u16 offset,
+			      char *buf, u32 count)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	int index;
+	u16 reg16;
+
+	switch (offset) {
+	case 0x400 ... 0x41f: /* vga ioports remapped */
+		goto unhandled;
+	case 0x500 ... 0x515: /* bochs dispi interface */
+		if (count != 2)
+			goto unhandled;
+		index = (offset - 0x500) / 2;
+		reg16 = *(u16 *)buf;
+		if (index < ARRAY_SIZE(mdev_state->vbe))
+			mdev_state->vbe[index] = reg16;
+		dev_dbg(dev, "%s: vbe write %d = %d (%s)\n",
+			__func__, index, reg16, vbe_name(index));
+		break;
+	case 0x600 ... 0x607: /* qemu extended regs */
+		goto unhandled;
+	default:
+unhandled:
+		dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+			__func__, offset, count);
+		break;
+	}
+}
+
+static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset,
+			     char *buf, u32 count)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	u16 reg16 = 0;
+	int index;
+
+	switch (offset) {
+	case 0x500 ... 0x515: /* bochs dispi interface */
+		if (count != 2)
+			goto unhandled;
+		index = (offset - 0x500) / 2;
+		if (index < ARRAY_SIZE(mdev_state->vbe))
+			reg16 = mdev_state->vbe[index];
+		dev_dbg(dev, "%s: vbe read %d = %d (%s)\n",
+			__func__, index, reg16, vbe_name(index));
+		*(u16 *)buf = reg16;
+		break;
+	default:
+unhandled:
+		dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+			__func__, offset, count);
+		memset(buf, 0, count);
+		break;
+	}
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
+			   loff_t pos, bool is_write)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct device *dev = mdev_dev(mdev);
+	struct page *pg;
+	loff_t poff;
+	char *map;
+	int ret = 0;
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	if (pos < MBOCHS_CONFIG_SPACE_SIZE) {
+		if (is_write)
+			handle_pci_cfg_write(mdev_state, pos, buf, count);
+		else
+			memcpy(buf, (mdev_state->vconfig + pos), count);
+
+	} else if (pos >= MBOCHS_MMIO_BAR_OFFSET &&
+		   pos + count <= MBOCHS_MEMORY_BAR_OFFSET) {
+		pos -= MBOCHS_MMIO_BAR_OFFSET;
+		if (is_write)
+			handle_mmio_write(mdev_state, pos, buf, count);
+		else
+			handle_mmio_read(mdev_state, pos, buf, count);
+
+	} else if (pos >= MBOCHS_MEMORY_BAR_OFFSET &&
+		   pos + count <=
+		   MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
+		pos -= MBOCHS_MMIO_BAR_OFFSET;
+		poff = pos & ~PAGE_MASK;
+		pg = mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
+		map = kmap(pg);
+		if (is_write)
+			memcpy(map + poff, buf, count);
+		else
+			memcpy(buf, map + poff, count);
+		kunmap(pg);
+		put_page(pg);
+
+	} else {
+		dev_dbg(dev, "%s: %s @0x%llx (unhandled)\n",
+			__func__, is_write ? "WR" : "RD", pos);
+		ret = -1;
+		goto accessfailed;
+	}
+
+	ret = count;
+
+
+accessfailed:
+	mutex_unlock(&mdev_state->ops_lock);
+
+	return ret;
+}
+
+static int mbochs_reset(struct mdev_device *mdev)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	u32 size64k = mdev_state->memsize / (64 * 1024);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mdev_state->vbe); i++)
+		mdev_state->vbe[i] = 0;
+	mdev_state->vbe[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
+	mdev_state->vbe[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = size64k;
+	return 0;
+}
+
+static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+	const struct mbochs_type *type = mbochs_find_type(kobj);
+	struct device *dev = mdev_dev(mdev);
+	struct mdev_state *mdev_state;
+
+	if (!type)
+		type = &mbochs_types[0];
+	if (type->mbytes + mbochs_used_mbytes > max_mbytes)
+		return -ENOMEM;
+
+	mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+	if (mdev_state == NULL)
+		return -ENOMEM;
+
+	mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL);
+	if (mdev_state->vconfig == NULL)
+		goto err_mem;
+
+	mdev_state->memsize = type->mbytes * 1024 * 1024;
+	mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT;
+	mdev_state->pages = kcalloc(mdev_state->pagecount,
+				    sizeof(struct page *),
+				    GFP_KERNEL);
+	if (!mdev_state->pages)
+		goto err_mem;
+
+	dev_info(dev, "%s: %s, %d MB, %ld pages\n", __func__,
+		 kobj->name, type->mbytes, mdev_state->pagecount);
+
+	mutex_init(&mdev_state->ops_lock);
+	mdev_state->mdev = mdev;
+	mdev_set_drvdata(mdev, mdev_state);
+	INIT_LIST_HEAD(&mdev_state->dmabufs);
+	mdev_state->next_id = 1;
+
+	mdev_state->type = type;
+	mbochs_create_config_space(mdev_state);
+	mbochs_reset(mdev);
+
+	mbochs_used_mbytes += type->mbytes;
+	return 0;
+
+err_mem:
+	kfree(mdev_state->vconfig);
+	kfree(mdev_state);
+	return -ENOMEM;
+}
+
+static int mbochs_remove(struct mdev_device *mdev)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	mbochs_used_mbytes -= mdev_state->type->mbytes;
+	mdev_set_drvdata(mdev, NULL);
+	kfree(mdev_state->pages);
+	kfree(mdev_state->vconfig);
+	kfree(mdev_state);
+	return 0;
+}
+
+static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf,
+			   size_t count, loff_t *ppos)
+{
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			ret =  mdev_access(mdev, (char *)&val, sizeof(val),
+					   *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 1;
+		}
+
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+
+read_err:
+	return -EFAULT;
+}
+
+static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 1;
+		}
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+write_err:
+	return -EFAULT;
+}
+
+static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
+				      pgoff_t pgoff)
+{
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	if (!mdev_state->pages[pgoff]) {
+		mdev_state->pages[pgoff] =
+			alloc_pages(GFP_HIGHUSER | __GFP_ZERO, 0);
+		if (!mdev_state->pages[pgoff])
+			return NULL;
+	}
+
+	get_page(mdev_state->pages[pgoff]);
+	return mdev_state->pages[pgoff];
+}
+
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+				    pgoff_t pgoff)
+{
+	struct page *page;
+
+	if (WARN_ON(pgoff >= mdev_state->pagecount))
+		return NULL;
+
+	mutex_lock(&mdev_state->ops_lock);
+	page = __mbochs_get_page(mdev_state, pgoff);
+	mutex_unlock(&mdev_state->ops_lock);
+
+	return page;
+}
+
+static void mbochs_put_pages(struct mdev_state *mdev_state)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	int i, count = 0;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	for (i = 0; i < mdev_state->pagecount; i++) {
+		if (!mdev_state->pages[i])
+			continue;
+		put_page(mdev_state->pages[i]);
+		mdev_state->pages[i] = NULL;
+		count++;
+	}
+	dev_dbg(dev, "%s: %d pages released\n", __func__, count);
+}
+
+static int mbochs_region_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct mdev_state *mdev_state = vma->vm_private_data;
+	pgoff_t page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+
+	if (page_offset >= mdev_state->pagecount)
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = mbochs_get_page(mdev_state, page_offset);
+	if (!vmf->page)
+		return VM_FAULT_SIGBUS;
+
+	return 0;
+}
+
+static const struct vm_operations_struct mbochs_region_vm_ops = {
+	.fault = mbochs_region_vm_fault,
+};
+
+static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+		return -EINVAL;
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+		return -EINVAL;
+	if ((vma->vm_flags & VM_SHARED) == 0)
+		return -EINVAL;
+
+	vma->vm_ops = &mbochs_region_vm_ops;
+	vma->vm_private_data = mdev_state;
+	return 0;
+}
+
+static int mbochs_dmabuf_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct mbochs_dmabuf *dmabuf = vma->vm_private_data;
+
+	if (WARN_ON(vmf->pgoff >= dmabuf->pagecount))
+		return VM_FAULT_SIGBUS;
+
+	vmf->page = dmabuf->pages[vmf->pgoff];
+	get_page(vmf->page);
+	return 0;
+}
+
+static const struct vm_operations_struct mbochs_dmabuf_vm_ops = {
+	.fault = mbochs_dmabuf_vm_fault,
+};
+
+static int mbochs_mmap_dmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+	struct mbochs_dmabuf *dmabuf = buf->priv;
+	struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+	dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+	if ((vma->vm_flags & VM_SHARED) == 0)
+		return -EINVAL;
+
+	vma->vm_ops = &mbochs_dmabuf_vm_ops;
+	vma->vm_private_data = dmabuf;
+	return 0;
+}
+
+static void mbochs_print_dmabuf(struct mbochs_dmabuf *dmabuf,
+				const char *prefix)
+{
+	struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+	u32 fourcc = dmabuf->mode.drm_format;
+
+	dev_dbg(dev, "%s/%d: %c%c%c%c, %dx%d, stride %d, off 0x%llx, size 0x%llx, pages %ld\n",
+		prefix, dmabuf->id,
+		fourcc ? ((fourcc >>  0) & 0xff) : '-',
+		fourcc ? ((fourcc >>  8) & 0xff) : '-',
+		fourcc ? ((fourcc >> 16) & 0xff) : '-',
+		fourcc ? ((fourcc >> 24) & 0xff) : '-',
+		dmabuf->mode.width, dmabuf->mode.height, dmabuf->mode.stride,
+		dmabuf->mode.offset, dmabuf->mode.size, dmabuf->pagecount);
+}
+
+static struct sg_table *mbochs_map_dmabuf(struct dma_buf_attachment *at,
+					  enum dma_data_direction direction)
+{
+	struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+	struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+	struct sg_table *sg;
+
+	dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+	if (!sg)
+		goto err1;
+	if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount,
+				      0, dmabuf->mode.size, GFP_KERNEL) < 0)
+		goto err2;
+	if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
+		goto err3;
+
+	return sg;
+
+err3:
+	sg_free_table(sg);
+err2:
+	kfree(sg);
+err1:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void mbochs_unmap_dmabuf(struct dma_buf_attachment *at,
+				struct sg_table *sg,
+				enum dma_data_direction direction)
+{
+	struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+	struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+	dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+	sg_free_table(sg);
+	kfree(sg);
+}
+
+static void mbochs_release_dmabuf(struct dma_buf *buf)
+{
+	struct mbochs_dmabuf *dmabuf = buf->priv;
+	struct mdev_state *mdev_state = dmabuf->mdev_state;
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	pgoff_t pg;
+
+	dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+	for (pg = 0; pg < dmabuf->pagecount; pg++)
+		put_page(dmabuf->pages[pg]);
+
+	mutex_lock(&mdev_state->ops_lock);
+	dmabuf->buf = NULL;
+	if (dmabuf->unlinked)
+		kfree(dmabuf);
+	mutex_unlock(&mdev_state->ops_lock);
+}
+
+static void *mbochs_kmap_atomic_dmabuf(struct dma_buf *buf,
+				       unsigned long page_num)
+{
+	struct mbochs_dmabuf *dmabuf = buf->priv;
+	struct page *page = dmabuf->pages[page_num];
+
+	return kmap_atomic(page);
+}
+
+static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num)
+{
+	struct mbochs_dmabuf *dmabuf = buf->priv;
+	struct page *page = dmabuf->pages[page_num];
+
+	return kmap(page);
+}
+
+static struct dma_buf_ops mbochs_dmabuf_ops = {
+	.map_dma_buf	  = mbochs_map_dmabuf,
+	.unmap_dma_buf	  = mbochs_unmap_dmabuf,
+	.release	  = mbochs_release_dmabuf,
+	.map_atomic	  = mbochs_kmap_atomic_dmabuf,
+	.map		  = mbochs_kmap_dmabuf,
+	.mmap		  = mbochs_mmap_dmabuf,
+};
+
+static struct mbochs_dmabuf *mbochs_dmabuf_alloc(struct mdev_state *mdev_state,
+						 struct mbochs_mode *mode)
+{
+	struct mbochs_dmabuf *dmabuf;
+	pgoff_t page_offset, pg;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	dmabuf = kzalloc(sizeof(struct mbochs_dmabuf), GFP_KERNEL);
+	if (!dmabuf)
+		return NULL;
+
+	dmabuf->mode = *mode;
+	dmabuf->id = mdev_state->next_id++;
+	dmabuf->pagecount = DIV_ROUND_UP(mode->size, PAGE_SIZE);
+	dmabuf->pages = kcalloc(dmabuf->pagecount, sizeof(struct page *),
+				GFP_KERNEL);
+	if (!dmabuf->pages)
+		goto err_free_dmabuf;
+
+	page_offset = dmabuf->mode.offset >> PAGE_SHIFT;
+	for (pg = 0; pg < dmabuf->pagecount; pg++) {
+		dmabuf->pages[pg] = __mbochs_get_page(mdev_state,
+						      page_offset + pg);
+		if (!dmabuf->pages[pg])
+			goto err_free_pages;
+	}
+
+	dmabuf->mdev_state = mdev_state;
+	list_add(&dmabuf->next, &mdev_state->dmabufs);
+
+	mbochs_print_dmabuf(dmabuf, __func__);
+	return dmabuf;
+
+err_free_pages:
+	while (pg > 0)
+		put_page(dmabuf->pages[--pg]);
+	kfree(dmabuf->pages);
+err_free_dmabuf:
+	kfree(dmabuf);
+	return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_mode(struct mdev_state *mdev_state,
+			   struct mbochs_mode *mode)
+{
+	struct mbochs_dmabuf *dmabuf;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+		if (mbochs_modes_equal(&dmabuf->mode, mode))
+			return dmabuf;
+
+	return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id)
+{
+	struct mbochs_dmabuf *dmabuf;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+		if (dmabuf->id == id)
+			return dmabuf;
+
+	return NULL;
+}
+
+static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
+{
+	struct mdev_state *mdev_state = dmabuf->mdev_state;
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct dma_buf *buf;
+
+	WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+	if (!IS_ALIGNED(dmabuf->mode.offset, PAGE_SIZE)) {
+		dev_info_ratelimited(dev, "%s: framebuffer not page-aligned\n",
+				     __func__);
+		return -EINVAL;
+	}
+
+	exp_info.ops = &mbochs_dmabuf_ops;
+	exp_info.size = dmabuf->mode.size;
+	exp_info.priv = dmabuf;
+
+	buf = dma_buf_export(&exp_info);
+	if (IS_ERR(buf)) {
+		dev_info_ratelimited(dev, "%s: dma_buf_export failed: %ld\n",
+				     __func__, PTR_ERR(buf));
+		return PTR_ERR(buf);
+	}
+
+	dmabuf->buf = buf;
+	dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+	return 0;
+}
+
+static int mbochs_get_region_info(struct mdev_device *mdev,
+				  struct vfio_region_info *region_info,
+				  u16 *cap_type_id, void **cap_type)
+{
+	struct mdev_state *mdev_state;
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return -EINVAL;
+
+	if (region_info->index >= VFIO_PCI_NUM_REGIONS)
+		return -EINVAL;
+
+	switch (region_info->index) {
+	case VFIO_PCI_CONFIG_REGION_INDEX:
+		region_info->offset = 0;
+		region_info->size   = MBOCHS_CONFIG_SPACE_SIZE;
+		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ |
+				       VFIO_REGION_INFO_FLAG_WRITE);
+		break;
+	case VFIO_PCI_BAR0_REGION_INDEX:
+		region_info->offset = MBOCHS_MEMORY_BAR_OFFSET;
+		region_info->size   = mdev_state->memsize;
+		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ  |
+				       VFIO_REGION_INFO_FLAG_WRITE |
+				       VFIO_REGION_INFO_FLAG_MMAP);
+		break;
+	case VFIO_PCI_BAR2_REGION_INDEX:
+		region_info->offset = MBOCHS_MMIO_BAR_OFFSET;
+		region_info->size   = MBOCHS_MMIO_BAR_SIZE;
+		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ  |
+				       VFIO_REGION_INFO_FLAG_WRITE);
+		break;
+	default:
+		region_info->size   = 0;
+		region_info->offset = 0;
+		region_info->flags  = 0;
+	}
+
+	return 0;
+}
+
+static int mbochs_get_irq_info(struct mdev_device *mdev,
+			       struct vfio_irq_info *irq_info)
+{
+	irq_info->count = 0;
+	return 0;
+}
+
+static int mbochs_get_device_info(struct mdev_device *mdev,
+				  struct vfio_device_info *dev_info)
+{
+	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+	return 0;
+}
+
+static int mbochs_query_gfx_plane(struct mdev_device *mdev,
+				  struct vfio_device_gfx_plane_info *plane)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct device *dev = mdev_dev(mdev);
+	struct mbochs_dmabuf *dmabuf;
+	struct mbochs_mode mode;
+	int ret;
+
+	if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+		if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+				     VFIO_GFX_PLANE_TYPE_DMABUF))
+			return 0;
+		return -EINVAL;
+	}
+
+	if (plane->flags != VFIO_GFX_PLANE_TYPE_DMABUF)
+		return -EINVAL;
+
+	plane->drm_format_mod = 0;
+	plane->x_pos	      = 0;
+	plane->y_pos	      = 0;
+	plane->x_hot	      = 0;
+	plane->y_hot	      = 0;
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	ret = -EINVAL;
+	if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY)
+		ret = mbochs_check_framebuffer(mdev_state, &mode);
+	if (ret < 0) {
+		plane->drm_format     = 0;
+		plane->width	      = 0;
+		plane->height	      = 0;
+		plane->stride	      = 0;
+		plane->size	      = 0;
+		plane->dmabuf_id      = 0;
+		goto done;
+	}
+
+	dmabuf = mbochs_dmabuf_find_by_mode(mdev_state, &mode);
+	if (!dmabuf)
+		mbochs_dmabuf_alloc(mdev_state, &mode);
+	if (!dmabuf) {
+		mutex_unlock(&mdev_state->ops_lock);
+		return -ENOMEM;
+	}
+
+	plane->drm_format     = dmabuf->mode.drm_format;
+	plane->width	      = dmabuf->mode.width;
+	plane->height	      = dmabuf->mode.height;
+	plane->stride	      = dmabuf->mode.stride;
+	plane->size	      = dmabuf->mode.size;
+	plane->dmabuf_id      = dmabuf->id;
+
+done:
+	if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY &&
+	    mdev_state->active_id != plane->dmabuf_id) {
+		dev_dbg(dev, "%s: primary: %d => %d\n", __func__,
+			mdev_state->active_id, plane->dmabuf_id);
+		mdev_state->active_id = plane->dmabuf_id;
+	}
+	mutex_unlock(&mdev_state->ops_lock);
+	return 0;
+}
+
+static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev,
+				 u32 id)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct mbochs_dmabuf *dmabuf;
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	dmabuf = mbochs_dmabuf_find_by_id(mdev_state, id);
+	if (!dmabuf) {
+		mutex_unlock(&mdev_state->ops_lock);
+		return -ENOENT;
+	}
+
+	if (!dmabuf->buf)
+		mbochs_dmabuf_export(dmabuf);
+
+	mutex_unlock(&mdev_state->ops_lock);
+
+	if (!dmabuf->buf)
+		return -EINVAL;
+
+	return dma_buf_fd(dmabuf->buf, 0);
+}
+
+static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
+			unsigned long arg)
+{
+	int ret = 0;
+	unsigned long minsz;
+	struct mdev_state *mdev_state;
+
+	mdev_state = mdev_get_drvdata(mdev);
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = mbochs_get_device_info(mdev, &info);
+		if (ret)
+			return ret;
+
+		memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+		u16 cap_type_id = 0;
+		void *cap_type = NULL;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = mbochs_get_region_info(mdev, &info, &cap_type_id,
+					   &cap_type);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if ((info.argsz < minsz) ||
+		    (info.index >= mdev_state->dev_info.num_irqs))
+			return -EINVAL;
+
+		ret = mbochs_get_irq_info(mdev, &info);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_QUERY_GFX_PLANE:
+	{
+		struct vfio_device_gfx_plane_info plane;
+
+		minsz = offsetofend(struct vfio_device_gfx_plane_info,
+				    region_index);
+
+		if (copy_from_user(&plane, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (plane.argsz < minsz)
+			return -EINVAL;
+
+		ret = mbochs_query_gfx_plane(mdev, &plane);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &plane, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_GET_GFX_DMABUF:
+	{
+		u32 dmabuf_id;
+
+		if (get_user(dmabuf_id, (__u32 __user *)arg))
+			return -EFAULT;
+
+		return mbochs_get_gfx_dmabuf(mdev, dmabuf_id);
+	}
+
+	case VFIO_DEVICE_SET_IRQS:
+		return -EINVAL;
+
+	case VFIO_DEVICE_RESET:
+		return mbochs_reset(mdev);
+	}
+	return -ENOTTY;
+}
+
+static int mbochs_open(struct mdev_device *mdev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	return 0;
+}
+
+static void mbochs_close(struct mdev_device *mdev)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct mbochs_dmabuf *dmabuf, *tmp;
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	list_for_each_entry_safe(dmabuf, tmp, &mdev_state->dmabufs, next) {
+		list_del(&dmabuf->next);
+		if (dmabuf->buf) {
+			/* free in mbochs_release_dmabuf() */
+			dmabuf->unlinked = true;
+		} else {
+			kfree(dmabuf);
+		}
+	}
+	mbochs_put_pages(mdev_state);
+
+	mutex_unlock(&mdev_state->ops_lock);
+	module_put(THIS_MODULE);
+}
+
+static ssize_t
+memory_show(struct device *dev, struct device_attribute *attr,
+	    char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	return sprintf(buf, "%d MB\n", mdev_state->type->mbytes);
+}
+static DEVICE_ATTR_RO(memory);
+
+static struct attribute *mdev_dev_attrs[] = {
+	&dev_attr_memory.attr,
+	NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+	.name  = "vendor",
+	.attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+	&mdev_dev_group,
+	NULL,
+};
+
+static ssize_t
+name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	return sprintf(buf, "%s\n", kobj->name);
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t
+description_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	const struct mbochs_type *type = mbochs_find_type(kobj);
+
+	return sprintf(buf, "virtual display, %d MB video memory\n",
+		       type ? type->mbytes  : 0);
+}
+MDEV_TYPE_ATTR_RO(description);
+
+static ssize_t
+available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	const struct mbochs_type *type = mbochs_find_type(kobj);
+	int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes;
+
+	return sprintf(buf, "%d\n", count);
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+			       char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *mdev_types_attrs[] = {
+	&mdev_type_attr_name.attr,
+	&mdev_type_attr_description.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_available_instances.attr,
+	NULL,
+};
+
+static struct attribute_group mdev_type_group1 = {
+	.name  = MBOCHS_TYPE_1,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group2 = {
+	.name  = MBOCHS_TYPE_2,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group3 = {
+	.name  = MBOCHS_TYPE_3,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group *mdev_type_groups[] = {
+	&mdev_type_group1,
+	&mdev_type_group2,
+	&mdev_type_group3,
+	NULL,
+};
+
+static const struct mdev_parent_ops mdev_fops = {
+	.owner			= THIS_MODULE,
+	.mdev_attr_groups	= mdev_dev_groups,
+	.supported_type_groups	= mdev_type_groups,
+	.create			= mbochs_create,
+	.remove			= mbochs_remove,
+	.open			= mbochs_open,
+	.release		= mbochs_close,
+	.read			= mbochs_read,
+	.write			= mbochs_write,
+	.ioctl			= mbochs_ioctl,
+	.mmap			= mbochs_mmap,
+};
+
+static const struct file_operations vd_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static void mbochs_device_release(struct device *dev)
+{
+	/* nothing */
+}
+
+static int __init mbochs_dev_init(void)
+{
+	int ret = 0;
+
+	ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK, MBOCHS_NAME);
+	if (ret < 0) {
+		pr_err("Error: failed to register mbochs_dev, err: %d\n", ret);
+		return ret;
+	}
+	cdev_init(&mbochs_cdev, &vd_fops);
+	cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK);
+	pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt));
+
+	mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME);
+	if (IS_ERR(mbochs_class)) {
+		pr_err("Error: failed to register mbochs_dev class\n");
+		ret = PTR_ERR(mbochs_class);
+		goto failed1;
+	}
+	mbochs_dev.class = mbochs_class;
+	mbochs_dev.release = mbochs_device_release;
+	dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME);
+
+	ret = device_register(&mbochs_dev);
+	if (ret)
+		goto failed2;
+
+	ret = mdev_register_device(&mbochs_dev, &mdev_fops);
+	if (ret)
+		goto failed3;
+
+	return 0;
+
+failed3:
+	device_unregister(&mbochs_dev);
+failed2:
+	class_destroy(mbochs_class);
+failed1:
+	cdev_del(&mbochs_cdev);
+	unregister_chrdev_region(mbochs_devt, MINORMASK);
+	return ret;
+}
+
+static void __exit mbochs_dev_exit(void)
+{
+	mbochs_dev.bus = NULL;
+	mdev_unregister_device(&mbochs_dev);
+
+	device_unregister(&mbochs_dev);
+	cdev_del(&mbochs_cdev);
+	unregister_chrdev_region(mbochs_devt, MINORMASK);
+	class_destroy(mbochs_class);
+	mbochs_class = NULL;
+}
+
+module_init(mbochs_dev_init)
+module_exit(mbochs_dev_exit)

+ 22 - 0
samples/vfio-mdev/mdpy-defs.h

@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Simple pci display device.
+ *
+ * Framebuffer memory is pci bar 0.
+ * Configuration (read-only) is in pci config space.
+ * Format field uses drm fourcc codes.
+ * ATM only DRM_FORMAT_XRGB8888 is supported.
+ */
+
+/* pci ids */
+#define MDPY_PCI_VENDOR_ID	0x1b36 /* redhat */
+#define MDPY_PCI_DEVICE_ID	0x000f
+#define MDPY_PCI_SUBVENDOR_ID	PCI_SUBVENDOR_ID_REDHAT_QUMRANET
+#define MDPY_PCI_SUBDEVICE_ID	PCI_SUBDEVICE_ID_QEMU
+
+/* pci cfg space offsets for fb config (dword) */
+#define MDPY_VENDORCAP_OFFSET   0x40
+#define MDPY_VENDORCAP_SIZE     0x10
+#define MDPY_FORMAT_OFFSET	(MDPY_VENDORCAP_OFFSET + 0x04)
+#define MDPY_WIDTH_OFFSET	(MDPY_VENDORCAP_OFFSET + 0x08)
+#define MDPY_HEIGHT_OFFSET	(MDPY_VENDORCAP_OFFSET + 0x0c)

+ 232 - 0
samples/vfio-mdev/mdpy-fb.c

@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Framebuffer driver for mdpy (mediated virtual pci display device).
+ *
+ * See mdpy-defs.h for device specs
+ *
+ *   (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Using some code snippets from simplefb and cirrusfb.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+static const struct fb_fix_screeninfo mdpy_fb_fix = {
+	.id		= "mdpy-fb",
+	.type		= FB_TYPE_PACKED_PIXELS,
+	.visual		= FB_VISUAL_TRUECOLOR,
+	.accel		= FB_ACCEL_NONE,
+};
+
+static const struct fb_var_screeninfo mdpy_fb_var = {
+	.height		= -1,
+	.width		= -1,
+	.activate	= FB_ACTIVATE_NOW,
+	.vmode		= FB_VMODE_NONINTERLACED,
+
+	.bits_per_pixel = 32,
+	.transp.offset	= 24,
+	.red.offset	= 16,
+	.green.offset	= 8,
+	.blue.offset	= 0,
+	.transp.length	= 8,
+	.red.length	= 8,
+	.green.length	= 8,
+	.blue.length	= 8,
+};
+
+#define PSEUDO_PALETTE_SIZE 16
+
+struct mdpy_fb_par {
+	u32 palette[PSEUDO_PALETTE_SIZE];
+};
+
+static int mdpy_fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+			      u_int transp, struct fb_info *info)
+{
+	u32 *pal = info->pseudo_palette;
+	u32 cr = red >> (16 - info->var.red.length);
+	u32 cg = green >> (16 - info->var.green.length);
+	u32 cb = blue >> (16 - info->var.blue.length);
+	u32 value, mask;
+
+	if (regno >= PSEUDO_PALETTE_SIZE)
+		return -EINVAL;
+
+	value = (cr << info->var.red.offset) |
+		(cg << info->var.green.offset) |
+		(cb << info->var.blue.offset);
+	if (info->var.transp.length > 0) {
+		mask = (1 << info->var.transp.length) - 1;
+		mask <<= info->var.transp.offset;
+		value |= mask;
+	}
+	pal[regno] = value;
+
+	return 0;
+}
+
+static void mdpy_fb_destroy(struct fb_info *info)
+{
+	if (info->screen_base)
+		iounmap(info->screen_base);
+}
+
+static struct fb_ops mdpy_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_destroy	= mdpy_fb_destroy,
+	.fb_setcolreg	= mdpy_fb_setcolreg,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+};
+
+static int mdpy_fb_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *ent)
+{
+	struct fb_info *info;
+	struct mdpy_fb_par *par;
+	u32 format, width, height;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0)
+		return ret;
+
+	ret = pci_request_regions(pdev, "mdpy-fb");
+	if (ret < 0)
+		return ret;
+
+	pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
+	pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET,	&width);
+	pci_read_config_dword(pdev, MDPY_HEIGHT_OFFSET, &height);
+	if (format != DRM_FORMAT_XRGB8888) {
+		pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
+			format, DRM_FORMAT_XRGB8888);
+		return -EINVAL;
+	}
+	if (width < 100	 || width > 10000) {
+		pci_err(pdev, "width (%d) out of range\n", width);
+		return -EINVAL;
+	}
+	if (height < 100 || height > 10000) {
+		pci_err(pdev, "height (%d) out of range\n", height);
+		return -EINVAL;
+	}
+	pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
+		 width, height);
+
+	info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
+	if (!info)
+		goto err_release_regions;
+	pci_set_drvdata(pdev, info);
+	par = info->par;
+
+	info->fix = mdpy_fb_fix;
+	info->fix.smem_start = pci_resource_start(pdev, 0);
+	info->fix.smem_len = pci_resource_len(pdev, 0);
+	info->fix.line_length = width * 4;
+
+	info->var = mdpy_fb_var;
+	info->var.xres = width;
+	info->var.yres = height;
+	info->var.xres_virtual = width;
+	info->var.yres_virtual = height;
+
+	info->screen_size = info->fix.smem_len;
+	info->screen_base = ioremap(info->fix.smem_start,
+				    info->screen_size);
+	if (!info->screen_base) {
+		pci_err(pdev, "ioremap(pcibar) failed\n");
+		ret = -EIO;
+		goto err_release_fb;
+	}
+
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto err_unmap;
+	}
+	info->apertures->ranges[0].base = info->fix.smem_start;
+	info->apertures->ranges[0].size = info->fix.smem_len;
+
+	info->fbops = &mdpy_fb_ops;
+	info->flags = FBINFO_DEFAULT;
+	info->pseudo_palette = par->palette;
+
+	ret = register_framebuffer(info);
+	if (ret < 0) {
+		pci_err(pdev, "mdpy-fb device register failed: %d\n", ret);
+		goto err_unmap;
+	}
+
+	pci_info(pdev, "fb%d registered\n", info->node);
+	return 0;
+
+err_unmap:
+	iounmap(info->screen_base);
+
+err_release_fb:
+	framebuffer_release(info);
+
+err_release_regions:
+	pci_release_regions(pdev);
+
+	return ret;
+}
+
+static void mdpy_fb_remove(struct pci_dev *pdev)
+{
+	struct fb_info *info = pci_get_drvdata(pdev);
+
+	unregister_framebuffer(info);
+	framebuffer_release(info);
+}
+
+static struct pci_device_id mdpy_fb_pci_table[] = {
+	{
+		.vendor	   = MDPY_PCI_VENDOR_ID,
+		.device	   = MDPY_PCI_DEVICE_ID,
+		.subvendor = MDPY_PCI_SUBVENDOR_ID,
+		.subdevice = MDPY_PCI_SUBDEVICE_ID,
+	}, {
+		/* end of list */
+	}
+};
+
+static struct pci_driver mdpy_fb_pci_driver = {
+	.name		= "mdpy-fb",
+	.id_table	= mdpy_fb_pci_table,
+	.probe		= mdpy_fb_probe,
+	.remove		= mdpy_fb_remove,
+};
+
+static int __init mdpy_fb_init(void)
+{
+	int ret;
+
+	ret = pci_register_driver(&mdpy_fb_pci_driver);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+module_init(mdpy_fb_init);
+
+MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table);
+MODULE_LICENSE("GPL v2");

+ 807 - 0
samples/vfio-mdev/mdpy.c

@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * See mdpy-defs.h for device specs
+ *
+ *   (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ *   Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *	 Author: Neo Jia <cjia@nvidia.com>
+ *		 Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+#define MDPY_NAME		"mdpy"
+#define MDPY_CLASS_NAME		"mdpy"
+
+#define MDPY_CONFIG_SPACE_SIZE	0xff
+#define MDPY_MEMORY_BAR_OFFSET	PAGE_SIZE
+#define MDPY_DISPLAY_REGION	16
+
+#define STORE_LE16(addr, val)	(*(u16 *)addr = val)
+#define STORE_LE32(addr, val)	(*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+static int max_devices = 4;
+module_param_named(count, max_devices, int, 0444);
+MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices");
+
+
+#define MDPY_TYPE_1 "vga"
+#define MDPY_TYPE_2 "xga"
+#define MDPY_TYPE_3 "hd"
+
+static const struct mdpy_type {
+	const char *name;
+	u32 format;
+	u32 bytepp;
+	u32 width;
+	u32 height;
+} mdpy_types[] = {
+	{
+		.name	= MDPY_CLASS_NAME "-" MDPY_TYPE_1,
+		.format = DRM_FORMAT_XRGB8888,
+		.bytepp = 4,
+		.width	= 640,
+		.height = 480,
+	}, {
+		.name	= MDPY_CLASS_NAME "-" MDPY_TYPE_2,
+		.format = DRM_FORMAT_XRGB8888,
+		.bytepp = 4,
+		.width	= 1024,
+		.height = 768,
+	}, {
+		.name	= MDPY_CLASS_NAME "-" MDPY_TYPE_3,
+		.format = DRM_FORMAT_XRGB8888,
+		.bytepp = 4,
+		.width	= 1920,
+		.height = 1080,
+	},
+};
+
+static dev_t		mdpy_devt;
+static struct class	*mdpy_class;
+static struct cdev	mdpy_cdev;
+static struct device	mdpy_dev;
+static u32		mdpy_count;
+
+/* State of each mdev device */
+struct mdev_state {
+	u8 *vconfig;
+	u32 bar_mask;
+	struct mutex ops_lock;
+	struct mdev_device *mdev;
+	struct vfio_device_info dev_info;
+
+	const struct mdpy_type *type;
+	u32 memsize;
+	void *memblk;
+};
+
+static const struct mdpy_type *mdpy_find_type(struct kobject *kobj)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mdpy_types); i++)
+		if (strcmp(mdpy_types[i].name, kobj->name) == 0)
+			return mdpy_types + i;
+	return NULL;
+}
+
+static void mdpy_create_config_space(struct mdev_state *mdev_state)
+{
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+		   MDPY_PCI_VENDOR_ID);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+		   MDPY_PCI_DEVICE_ID);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+		   MDPY_PCI_SUBVENDOR_ID);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+		   MDPY_PCI_SUBDEVICE_ID);
+
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+		   PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_STATUS],
+		   PCI_STATUS_CAP_LIST);
+	STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+		   PCI_CLASS_DISPLAY_OTHER);
+	mdev_state->vconfig[PCI_CLASS_REVISION] =  0x01;
+
+	STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+		   PCI_BASE_ADDRESS_SPACE_MEMORY |
+		   PCI_BASE_ADDRESS_MEM_TYPE_32	 |
+		   PCI_BASE_ADDRESS_MEM_PREFETCH);
+	mdev_state->bar_mask = ~(mdev_state->memsize) + 1;
+
+	/* vendor specific capability for the config registers */
+	mdev_state->vconfig[PCI_CAPABILITY_LIST]       = MDPY_VENDORCAP_OFFSET;
+	mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 0] = 0x09; /* vendor cap */
+	mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 1] = 0x00; /* next ptr */
+	mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 2] = MDPY_VENDORCAP_SIZE;
+	STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_FORMAT_OFFSET],
+		   mdev_state->type->format);
+	STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_WIDTH_OFFSET],
+		   mdev_state->type->width);
+	STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_HEIGHT_OFFSET],
+		   mdev_state->type->height);
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+				 char *buf, u32 count)
+{
+	struct device *dev = mdev_dev(mdev_state->mdev);
+	u32 cfg_addr;
+
+	switch (offset) {
+	case PCI_BASE_ADDRESS_0:
+		cfg_addr = *(u32 *)buf;
+
+		if (cfg_addr == 0xffffffff) {
+			cfg_addr = (cfg_addr & mdev_state->bar_mask);
+		} else {
+			cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+			if (cfg_addr)
+				dev_info(dev, "BAR0 @ 0x%x\n", cfg_addr);
+		}
+
+		cfg_addr |= (mdev_state->vconfig[offset] &
+			     ~PCI_BASE_ADDRESS_MEM_MASK);
+		STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+		break;
+	}
+}
+
+static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
+			   loff_t pos, bool is_write)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct device *dev = mdev_dev(mdev);
+	int ret = 0;
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	if (pos < MDPY_CONFIG_SPACE_SIZE) {
+		if (is_write)
+			handle_pci_cfg_write(mdev_state, pos, buf, count);
+		else
+			memcpy(buf, (mdev_state->vconfig + pos), count);
+
+	} else if ((pos >= MDPY_MEMORY_BAR_OFFSET) &&
+		   (pos + count <=
+		    MDPY_MEMORY_BAR_OFFSET + mdev_state->memsize)) {
+		pos -= MDPY_MEMORY_BAR_OFFSET;
+		if (is_write)
+			memcpy(mdev_state->memblk, buf, count);
+		else
+			memcpy(buf, mdev_state->memblk, count);
+
+	} else {
+		dev_info(dev, "%s: %s @0x%llx (unhandled)\n",
+			 __func__, is_write ? "WR" : "RD", pos);
+		ret = -1;
+		goto accessfailed;
+	}
+
+	ret = count;
+
+
+accessfailed:
+	mutex_unlock(&mdev_state->ops_lock);
+
+	return ret;
+}
+
+static int mdpy_reset(struct mdev_device *mdev)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	u32 stride, i;
+
+	/* initialize with gray gradient */
+	stride = mdev_state->type->width * mdev_state->type->bytepp;
+	for (i = 0; i < mdev_state->type->height; i++)
+		memset(mdev_state->memblk + i * stride,
+		       i * 255 / mdev_state->type->height,
+		       stride);
+	return 0;
+}
+
+static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+	const struct mdpy_type *type = mdpy_find_type(kobj);
+	struct device *dev = mdev_dev(mdev);
+	struct mdev_state *mdev_state;
+	u32 fbsize;
+
+	if (mdpy_count >= max_devices)
+		return -ENOMEM;
+
+	mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL);
+	if (mdev_state == NULL)
+		return -ENOMEM;
+
+	mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+	if (mdev_state->vconfig == NULL) {
+		kfree(mdev_state);
+		return -ENOMEM;
+	}
+
+	if (!type)
+		type = &mdpy_types[0];
+	fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
+
+	mdev_state->memblk = vmalloc_user(fbsize);
+	if (!mdev_state->memblk) {
+		kfree(mdev_state->vconfig);
+		kfree(mdev_state);
+		return -ENOMEM;
+	}
+	dev_info(dev, "%s: %s (%dx%d)\n",
+		 __func__, kobj->name, type->width, type->height);
+
+	mutex_init(&mdev_state->ops_lock);
+	mdev_state->mdev = mdev;
+	mdev_set_drvdata(mdev, mdev_state);
+
+	mdev_state->type    = type;
+	mdev_state->memsize = fbsize;
+	mdpy_create_config_space(mdev_state);
+	mdpy_reset(mdev);
+
+	mdpy_count++;
+	return 0;
+}
+
+static int mdpy_remove(struct mdev_device *mdev)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+	struct device *dev = mdev_dev(mdev);
+
+	dev_info(dev, "%s\n", __func__);
+
+	mdev_set_drvdata(mdev, NULL);
+	vfree(mdev_state->memblk);
+	kfree(mdev_state->vconfig);
+	kfree(mdev_state);
+
+	mdpy_count--;
+	return 0;
+}
+
+static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			ret =  mdev_access(mdev, (char *)&val, sizeof(val),
+					   *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 1;
+		}
+
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+
+read_err:
+	return -EFAULT;
+}
+
+static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf,
+			  size_t count, loff_t *ppos)
+{
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = mdev_access(mdev, (char *)&val, sizeof(val),
+					  *ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 1;
+		}
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+write_err:
+	return -EFAULT;
+}
+
+static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+		return -EINVAL;
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+		return -EINVAL;
+	if ((vma->vm_flags & VM_SHARED) == 0)
+		return -EINVAL;
+
+	return remap_vmalloc_range_partial(vma, vma->vm_start,
+					   mdev_state->memblk,
+					   vma->vm_end - vma->vm_start);
+}
+
+static int mdpy_get_region_info(struct mdev_device *mdev,
+				struct vfio_region_info *region_info,
+				u16 *cap_type_id, void **cap_type)
+{
+	struct mdev_state *mdev_state;
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return -EINVAL;
+
+	if (region_info->index >= VFIO_PCI_NUM_REGIONS &&
+	    region_info->index != MDPY_DISPLAY_REGION)
+		return -EINVAL;
+
+	switch (region_info->index) {
+	case VFIO_PCI_CONFIG_REGION_INDEX:
+		region_info->offset = 0;
+		region_info->size   = MDPY_CONFIG_SPACE_SIZE;
+		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ |
+				       VFIO_REGION_INFO_FLAG_WRITE);
+		break;
+	case VFIO_PCI_BAR0_REGION_INDEX:
+	case MDPY_DISPLAY_REGION:
+		region_info->offset = MDPY_MEMORY_BAR_OFFSET;
+		region_info->size   = mdev_state->memsize;
+		region_info->flags  = (VFIO_REGION_INFO_FLAG_READ  |
+				       VFIO_REGION_INFO_FLAG_WRITE |
+				       VFIO_REGION_INFO_FLAG_MMAP);
+		break;
+	default:
+		region_info->size   = 0;
+		region_info->offset = 0;
+		region_info->flags  = 0;
+	}
+
+	return 0;
+}
+
+static int mdpy_get_irq_info(struct mdev_device *mdev,
+			     struct vfio_irq_info *irq_info)
+{
+	irq_info->count = 0;
+	return 0;
+}
+
+static int mdpy_get_device_info(struct mdev_device *mdev,
+				struct vfio_device_info *dev_info)
+{
+	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+	return 0;
+}
+
+static int mdpy_query_gfx_plane(struct mdev_device *mdev,
+				struct vfio_device_gfx_plane_info *plane)
+{
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+		if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+				     VFIO_GFX_PLANE_TYPE_REGION))
+			return 0;
+		return -EINVAL;
+	}
+
+	if (plane->flags != VFIO_GFX_PLANE_TYPE_REGION)
+		return -EINVAL;
+
+	plane->drm_format     = mdev_state->type->format;
+	plane->width	      = mdev_state->type->width;
+	plane->height	      = mdev_state->type->height;
+	plane->stride	      = (mdev_state->type->width *
+				 mdev_state->type->bytepp);
+	plane->size	      = mdev_state->memsize;
+	plane->region_index   = MDPY_DISPLAY_REGION;
+
+	/* unused */
+	plane->drm_format_mod = 0;
+	plane->x_pos	      = 0;
+	plane->y_pos	      = 0;
+	plane->x_hot	      = 0;
+	plane->y_hot	      = 0;
+
+	return 0;
+}
+
+static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd,
+		       unsigned long arg)
+{
+	int ret = 0;
+	unsigned long minsz;
+	struct mdev_state *mdev_state;
+
+	mdev_state = mdev_get_drvdata(mdev);
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = mdpy_get_device_info(mdev, &info);
+		if (ret)
+			return ret;
+
+		memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+		u16 cap_type_id = 0;
+		void *cap_type = NULL;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		ret = mdpy_get_region_info(mdev, &info, &cap_type_id,
+					   &cap_type);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if ((info.argsz < minsz) ||
+		    (info.index >= mdev_state->dev_info.num_irqs))
+			return -EINVAL;
+
+		ret = mdpy_get_irq_info(mdev, &info);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_QUERY_GFX_PLANE:
+	{
+		struct vfio_device_gfx_plane_info plane;
+
+		minsz = offsetofend(struct vfio_device_gfx_plane_info,
+				    region_index);
+
+		if (copy_from_user(&plane, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (plane.argsz < minsz)
+			return -EINVAL;
+
+		ret = mdpy_query_gfx_plane(mdev, &plane);
+		if (ret)
+			return ret;
+
+		if (copy_to_user((void __user *)arg, &plane, minsz))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	case VFIO_DEVICE_SET_IRQS:
+		return -EINVAL;
+
+	case VFIO_DEVICE_RESET:
+		return mdpy_reset(mdev);
+	}
+	return -ENOTTY;
+}
+
+static int mdpy_open(struct mdev_device *mdev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	return 0;
+}
+
+static void mdpy_close(struct mdev_device *mdev)
+{
+	module_put(THIS_MODULE);
+}
+
+static ssize_t
+resolution_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct mdev_state *mdev_state = mdev_get_drvdata(mdev);
+
+	return sprintf(buf, "%dx%d\n",
+		       mdev_state->type->width,
+		       mdev_state->type->height);
+}
+static DEVICE_ATTR_RO(resolution);
+
+static struct attribute *mdev_dev_attrs[] = {
+	&dev_attr_resolution.attr,
+	NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+	.name  = "vendor",
+	.attrs = mdev_dev_attrs,
+};
+
+const struct attribute_group *mdev_dev_groups[] = {
+	&mdev_dev_group,
+	NULL,
+};
+
+static ssize_t
+name_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	return sprintf(buf, "%s\n", kobj->name);
+}
+MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t
+description_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	const struct mdpy_type *type = mdpy_find_type(kobj);
+
+	return sprintf(buf, "virtual display, %dx%d framebuffer\n",
+		       type ? type->width  : 0,
+		       type ? type->height : 0);
+}
+MDEV_TYPE_ATTR_RO(description);
+
+static ssize_t
+available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+	return sprintf(buf, "%d\n", max_devices - mdpy_count);
+}
+MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
+			       char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+MDEV_TYPE_ATTR_RO(device_api);
+
+static struct attribute *mdev_types_attrs[] = {
+	&mdev_type_attr_name.attr,
+	&mdev_type_attr_description.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_available_instances.attr,
+	NULL,
+};
+
+static struct attribute_group mdev_type_group1 = {
+	.name  = MDPY_TYPE_1,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group2 = {
+	.name  = MDPY_TYPE_2,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group mdev_type_group3 = {
+	.name  = MDPY_TYPE_3,
+	.attrs = mdev_types_attrs,
+};
+
+static struct attribute_group *mdev_type_groups[] = {
+	&mdev_type_group1,
+	&mdev_type_group2,
+	&mdev_type_group3,
+	NULL,
+};
+
+static const struct mdev_parent_ops mdev_fops = {
+	.owner			= THIS_MODULE,
+	.mdev_attr_groups	= mdev_dev_groups,
+	.supported_type_groups	= mdev_type_groups,
+	.create			= mdpy_create,
+	.remove			= mdpy_remove,
+	.open			= mdpy_open,
+	.release		= mdpy_close,
+	.read			= mdpy_read,
+	.write			= mdpy_write,
+	.ioctl			= mdpy_ioctl,
+	.mmap			= mdpy_mmap,
+};
+
+static const struct file_operations vd_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static void mdpy_device_release(struct device *dev)
+{
+	/* nothing */
+}
+
+static int __init mdpy_dev_init(void)
+{
+	int ret = 0;
+
+	ret = alloc_chrdev_region(&mdpy_devt, 0, MINORMASK, MDPY_NAME);
+	if (ret < 0) {
+		pr_err("Error: failed to register mdpy_dev, err: %d\n", ret);
+		return ret;
+	}
+	cdev_init(&mdpy_cdev, &vd_fops);
+	cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK);
+	pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt));
+
+	mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME);
+	if (IS_ERR(mdpy_class)) {
+		pr_err("Error: failed to register mdpy_dev class\n");
+		ret = PTR_ERR(mdpy_class);
+		goto failed1;
+	}
+	mdpy_dev.class = mdpy_class;
+	mdpy_dev.release = mdpy_device_release;
+	dev_set_name(&mdpy_dev, "%s", MDPY_NAME);
+
+	ret = device_register(&mdpy_dev);
+	if (ret)
+		goto failed2;
+
+	ret = mdev_register_device(&mdpy_dev, &mdev_fops);
+	if (ret)
+		goto failed3;
+
+	return 0;
+
+failed3:
+	device_unregister(&mdpy_dev);
+failed2:
+	class_destroy(mdpy_class);
+failed1:
+	cdev_del(&mdpy_cdev);
+	unregister_chrdev_region(mdpy_devt, MINORMASK);
+	return ret;
+}
+
+static void __exit mdpy_dev_exit(void)
+{
+	mdpy_dev.bus = NULL;
+	mdev_unregister_device(&mdpy_dev);
+
+	device_unregister(&mdpy_dev);
+	cdev_del(&mdpy_cdev);
+	unregister_chrdev_region(mdpy_devt, MINORMASK);
+	class_destroy(mdpy_class);
+	mdpy_class = NULL;
+}
+
+module_init(mdpy_dev_init)
+module_exit(mdpy_dev_exit)