Browse Source

Merge tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull Xen update from Konrad Rzeszutek Wilk:
 "Features:
   - When hotplugging PCI devices in a PV guest we can allocate
     Xen-SWIOTLB later.
   - Cleanup Xen SWIOTLB.
   - Support pages out grants from HVM domains in the backends.
   - Support wild cards in xen-pciback.hide=(BDF) arguments.
   - Update grant status updates with upstream hypervisor.
   - Boot PV guests with more than 128GB.
   - Cleanup Xen MMU code/add comments.
   - Obtain XENVERS using a preferred method.
   - Lay out generic changes to support Xen ARM.
   - Allow privcmd ioctl for HVM (used to do only PV).
   - Do v2 of mmap_batch for privcmd ioctls.
   - If hypervisor saves the LED keyboard light - we will now instruct
     the kernel about its state.
  Fixes:
   - More fixes to Xen PCI backend for various calls/FLR/etc.
   - With more than 4GB in a 64-bit PV guest disable native SWIOTLB.
   - Fix up smatch warnings.
   - Fix up various return values in privmcmd and mm."

* tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (48 commits)
  xen/pciback: Restore the PCI config space after an FLR.
  xen-pciback: properly clean up after calling pcistub_device_find()
  xen/vga: add the xen EFI video mode support
  xen/x86: retrieve keyboard shift status flags from hypervisor.
  xen/gndev: Xen backend support for paged out grant targets V4.
  xen-pciback: support wild cards in slot specifications
  xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer.
  xen/swiotlb: Remove functions not needed anymore.
  xen/pcifront: Use Xen-SWIOTLB when initting if required.
  xen/swiotlb: For early initialization, return zero on success.
  xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used.
  xen/swiotlb: Move the error strings to its own function.
  xen/swiotlb: Move the nr_tbl determination in its own function.
  xen/arm: compile and run xenbus
  xen: resynchronise grant table status codes with upstream
  xen/privcmd: return -EFAULT on error
  xen/privcmd: Fix mmap batch ioctl error status copy back.
  xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
  xen/mm: return more precise error from xen_remap_domain_range()
  xen/mmu: If the revector fails, don't attempt to revector anything else.
  ...
Linus Torvalds 13 năm trước cách đây
mục cha
commit
56d92aa5cf
40 tập tin đã thay đổi với 916 bổ sung249 xóa
  1. 6 1
      arch/ia64/include/asm/xen/interface.h
  2. 7 0
      arch/x86/include/asm/xen/interface.h
  3. 2 0
      arch/x86/include/asm/xen/swiotlb-xen.h
  4. 2 1
      arch/x86/xen/apic.c
  5. 11 4
      arch/x86/xen/enlighten.c
  6. 153 37
      arch/x86/xen/mmu.c
  7. 85 7
      arch/x86/xen/p2m.c
  8. 45 7
      arch/x86/xen/pci-swiotlb-xen.c
  9. 1 0
      arch/x86/xen/platform-pci-unplug.c
  10. 18 0
      arch/x86/xen/setup.c
  11. 7 0
      arch/x86/xen/vga.c
  12. 54 2
      arch/x86/xen/xen-head.S
  13. 2 1
      arch/x86/xen/xen-ops.h
  14. 3 8
      drivers/net/xen-netback/netback.c
  15. 10 5
      drivers/pci/xen-pcifront.c
  16. 2 0
      drivers/tty/hvc/hvc_xen.c
  17. 15 3
      drivers/xen/events.c
  18. 1 1
      drivers/xen/gntdev.c
  19. 60 7
      drivers/xen/grant-table.c
  20. 102 33
      drivers/xen/privcmd.c
  21. 76 43
      drivers/xen/swiotlb-xen.c
  22. 12 1
      drivers/xen/sys-hypervisor.c
  23. 1 0
      drivers/xen/tmem.c
  24. 106 30
      drivers/xen/xen-pciback/pci_stub.c
  25. 2 4
      drivers/xen/xenbus/xenbus_client.c
  26. 1 1
      drivers/xen/xenbus/xenbus_comms.c
  27. 1 1
      drivers/xen/xenbus/xenbus_dev_backend.c
  28. 40 16
      drivers/xen/xenbus/xenbus_probe.c
  29. 1 0
      drivers/xen/xenbus/xenbus_probe_frontend.c
  30. 2 1
      drivers/xen/xenbus/xenbus_xs.c
  31. 1 0
      include/linux/swiotlb.h
  32. 12 0
      include/xen/grant_table.h
  33. 8 4
      include/xen/interface/grant_table.h
  34. 6 3
      include/xen/interface/memory.h
  35. 5 2
      include/xen/interface/platform.h
  36. 3 0
      include/xen/interface/version.h
  37. 4 4
      include/xen/interface/xen.h
  38. 24 3
      include/xen/privcmd.h
  39. 1 10
      include/xen/swiotlb-xen.h
  40. 24 9
      lib/swiotlb.c

+ 6 - 1
arch/ia64/include/asm/xen/interface.h

@@ -67,6 +67,10 @@
 #define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
 
 #ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the public interface
+ * with Xen so that we could have one ABI that works for 32 and 64 bit
+ * guests. */
+typedef unsigned long xen_pfn_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint, unsigned int);
@@ -79,7 +83,6 @@ DEFINE_GUEST_HANDLE(void);
 DEFINE_GUEST_HANDLE(uint64_t);
 DEFINE_GUEST_HANDLE(uint32_t);
 
-typedef unsigned long xen_pfn_t;
 DEFINE_GUEST_HANDLE(xen_pfn_t);
 #define PRI_xen_pfn	"lx"
 #endif
@@ -265,6 +268,8 @@ typedef struct xen_callback xen_callback_t;
 
 #endif /* !__ASSEMBLY__ */
 
+#include <asm/pvclock-abi.h>
+
 /* Size of the shared_info area (this is not related to page size).  */
 #define XSI_SHIFT			14
 #define XSI_SIZE			(1 << XSI_SHIFT)

+ 7 - 0
arch/x86/include/asm/xen/interface.h

@@ -47,6 +47,10 @@
 #endif
 
 #ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the public interface
+ * with Xen so that on ARM we can have one ABI that works for 32 and 64
+ * bit guests. */
+typedef unsigned long xen_pfn_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint,  unsigned int);
@@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long);
 DEFINE_GUEST_HANDLE(void);
 DEFINE_GUEST_HANDLE(uint64_t);
 DEFINE_GUEST_HANDLE(uint32_t);
+DEFINE_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 #ifndef HYPERVISOR_VIRT_START
@@ -121,6 +126,8 @@ struct arch_shared_info {
 #include "interface_64.h"
 #endif
 
+#include <asm/pvclock-abi.h>
+
 #ifndef __ASSEMBLY__
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled

+ 2 - 0
arch/x86/include/asm/xen/swiotlb-xen.h

@@ -5,10 +5,12 @@
 extern int xen_swiotlb;
 extern int __init pci_xen_swiotlb_detect(void);
 extern void __init pci_xen_swiotlb_init(void);
+extern int pci_xen_swiotlb_init_late(void);
 #else
 #define xen_swiotlb (0)
 static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
 static inline void __init pci_xen_swiotlb_init(void) { }
+static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
 #endif
 
 #endif /* _ASM_X86_SWIOTLB_XEN_H */

+ 2 - 1
arch/x86/xen/apic.c

@@ -6,8 +6,9 @@
 
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
+#include "xen-ops.h"
 
-unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
+static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 {
 	struct physdev_apic apic_op;
 	int ret;

+ 11 - 4
arch/x86/xen/enlighten.c

@@ -80,6 +80,8 @@
 #include "smp.h"
 #include "multicalls.h"
 
+#include <xen/events.h>
+
 EXPORT_SYMBOL_GPL(hypercall_page);
 
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
@@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
 {
 	struct physdev_set_iopl set_iopl;
 	int rc;
-	pgd_t *pgd;
 
 	if (!xen_start_info)
 		return;
@@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
 	acpi_numa = -1;
 #endif
 
-	pgd = (pgd_t *)xen_start_info->pt_base;
-
 	/* Don't do the full vcpu_info placement stuff until we have a
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
 	early_boot_irqs_disabled = true;
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
-	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
 
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
@@ -1441,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void)
 		const struct dom0_vga_console_info *info =
 			(void *)((char *)xen_start_info +
 				 xen_start_info->console.dom0.info_off);
+		struct xen_platform_op op = {
+			.cmd = XENPF_firmware_info,
+			.interface_version = XENPF_INTERFACE_VERSION,
+			.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
+		};
 
 		xen_init_vga(info, xen_start_info->console.dom0.info_size);
 		xen_start_info->console.domU.mfn = 0;
 		xen_start_info->console.domU.evtchn = 0;
 
+		if (HYPERVISOR_dom0_op(&op) == 0)
+			boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
+
 		xen_init_apic();
 
 		/* Make sure ACS will be enabled */

+ 153 - 37
arch/x86/xen/mmu.c

@@ -84,6 +84,7 @@
  */
 DEFINE_SPINLOCK(xen_reservation_lock);
 
+#ifdef CONFIG_X86_32
 /*
  * Identity map, in addition to plain kernel map.  This needs to be
  * large enough to allocate page table pages to allocate the rest.
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
  */
 #define LEVEL1_IDENT_ENTRIES	(PTRS_PER_PTE * 4)
 static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-
+#endif
 #ifdef CONFIG_X86_64
 /* l3 pud for userspace vsyscall mapping */
 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
 
 static void xen_post_allocator_init(void);
 
-static void __init xen_pagetable_init(void)
-{
-	paging_init();
-	xen_setup_shared_info();
-	xen_post_allocator_init();
-}
-
 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
 {
 	/* reserve the range used */
@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
 	}
 }
 
+#ifdef CONFIG_X86_64
+static void __init xen_cleanhighmap(unsigned long vaddr,
+				    unsigned long vaddr_end)
+{
+	unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
+	pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
+
+	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
+	 * We include the PMD passed in on _both_ boundaries. */
+	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
+			pmd++, vaddr += PMD_SIZE) {
+		if (pmd_none(*pmd))
+			continue;
+		if (vaddr < (unsigned long) _text || vaddr > kernel_end)
+			set_pmd(pmd, __pmd(0));
+	}
+	/* In case we did something silly, we should crash in this function
+	 * instead of somewhere later and be confusing. */
+	xen_mc_flush();
+}
+#endif
+static void __init xen_pagetable_init(void)
+{
+#ifdef CONFIG_X86_64
+	unsigned long size;
+	unsigned long addr;
+#endif
+	paging_init();
+	xen_setup_shared_info();
+#ifdef CONFIG_X86_64
+	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+		unsigned long new_mfn_list;
+
+		size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+
+		/* On 32-bit, we get zero so this never gets executed. */
+		new_mfn_list = xen_revector_p2m_tree();
+		if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
+			/* using __ka address and sticking INVALID_P2M_ENTRY! */
+			memset((void *)xen_start_info->mfn_list, 0xff, size);
+
+			/* We should be in __ka space. */
+			BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
+			addr = xen_start_info->mfn_list;
+			/* We roundup to the PMD, which means that if anybody at this stage is
+			 * using the __ka address of xen_start_info or xen_start_info->shared_info
+			 * they are in going to crash. Fortunatly we have already revectored
+			 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
+			size = roundup(size, PMD_SIZE);
+			xen_cleanhighmap(addr, addr + size);
+
+			size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+			memblock_free(__pa(xen_start_info->mfn_list), size);
+			/* And revector! Bye bye old array */
+			xen_start_info->mfn_list = new_mfn_list;
+		} else
+			goto skip;
+	}
+	/* At this stage, cleanup_highmap has already cleaned __ka space
+	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
+	 * the ramdisk). We continue on, erasing PMD entries that point to page
+	 * tables - do note that they are accessible at this stage via __va.
+	 * For good measure we also round up to the PMD - which means that if
+	 * anybody is using __ka address to the initial boot-stack - and try
+	 * to use it - they are going to crash. The xen_start_info has been
+	 * taken care of already in xen_setup_kernel_pagetable. */
+	addr = xen_start_info->pt_base;
+	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+
+	xen_cleanhighmap(addr, addr + size);
+	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
+#ifdef DEBUG
+	/* This is superflous and is not neccessary, but you know what
+	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
+	 * anything at this stage. */
+	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
+#endif
+skip:
+#endif
+	xen_post_allocator_init();
+}
 static void xen_write_cr2(unsigned long cr2)
 {
 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
-
+#ifdef CONFIG_X86_32
 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 
 	set_page_prot(pmd, PAGE_KERNEL_RO);
 }
-
+#endif
 void __init xen_setup_machphys_mapping(void)
 {
 	struct xen_machphys_mapping mapping;
@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		pte[i] = xen_make_pte(pte[i].pte);
 }
-
+static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
+				 unsigned long addr)
+{
+	if (*pt_base == PFN_DOWN(__pa(addr))) {
+		set_page_prot((void *)addr, PAGE_KERNEL);
+		clear_page((void *)addr);
+		(*pt_base)++;
+	}
+	if (*pt_end == PFN_DOWN(__pa(addr))) {
+		set_page_prot((void *)addr, PAGE_KERNEL);
+		clear_page((void *)addr);
+		(*pt_end)--;
+	}
+}
 /*
  * Set up the initial kernel pagetable.
  *
@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
-					 unsigned long max_pfn)
+void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
+	unsigned long addr[3];
+	unsigned long pt_base, pt_end;
+	unsigned i;
 
 	/* max_pfn_mapped is the last pfn mapped in the initial memory
 	 * mappings. Considering that on Xen after the kernel mappings we
@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	 * set max_pfn_mapped to the last real pfn mapped. */
 	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
 
+	pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
+	pt_end = pt_base + xen_start_info->nr_pt_frames;
+
 	/* Zap identity mapping */
 	init_level4_pgt[0] = __pgd(0);
 
 	/* Pre-constructed entries are in pfn, so convert to mfn */
+	/* L4[272] -> level3_ident_pgt
+	 * L4[511] -> level3_kernel_pgt */
 	convert_pfn_mfn(init_level4_pgt);
+
+	/* L3_i[0] -> level2_ident_pgt */
 	convert_pfn_mfn(level3_ident_pgt);
+	/* L3_k[510] -> level2_kernel_pgt
+	 * L3_i[511] -> level2_fixmap_pgt */
 	convert_pfn_mfn(level3_kernel_pgt);
 
+	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
 
-	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
+	addr[0] = (unsigned long)pgd;
+	addr[1] = (unsigned long)l3;
+	addr[2] = (unsigned long)l2;
+	/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
+	 * Both L4[272][0] and L4[511][511] have entries that point to the same
+	 * L2 (PMD) tables. Meaning that if you modify it in __va space
+	 * it will be also modified in the __ka space! (But if you just
+	 * modify the PMD table to point to other PTE's or none, then you
+	 * are OK - which is what cleanup_highmap does) */
+	copy_page(level2_ident_pgt, l2);
+	/* Graft it onto L4[511][511] */
+	copy_page(level2_kernel_pgt, l2);
+
+	/* Get [511][510] and graft that in level2_fixmap_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
-	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
-
-	/* Set up identity map */
-	xen_map_identity_early(level2_ident_pgt, max_pfn);
+	copy_page(level2_fixmap_pgt, l2);
+	/* Note that we don't do anything with level1_fixmap_pgt which
+	 * we don't need. */
 
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-	/* Switch over */
-	pgd = init_level4_pgt;
-
 	/*
 	 * At this stage there can be no user pgd, and no page
 	 * structure to attach it to, so make sure we just set kernel
 	 * pgd.
 	 */
 	xen_mc_batch();
-	__xen_write_cr3(true, __pa(pgd));
+	__xen_write_cr3(true, __pa(init_level4_pgt));
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
-	memblock_reserve(__pa(xen_start_info->pt_base),
-			 xen_start_info->nr_pt_frames * PAGE_SIZE);
+	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
+	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
+	 * the initial domain. For guests using the toolstack, they are in:
+	 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
+	 * rip out the [L4] (pgd), but for guests we shave off three pages.
+	 */
+	for (i = 0; i < ARRAY_SIZE(addr); i++)
+		check_pt_base(&pt_base, &pt_end, addr[i]);
 
-	return pgd;
+	/* Our (by three pages) smaller Xen pagetable that we are using */
+	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
+	/* Revector the xen_start_info */
+	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
 }
 #else	/* !CONFIG_X86_64 */
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	 */
 	swapper_kernel_pmd =
 		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
-	memcpy(swapper_kernel_pmd, initial_kernel_pmd,
-	       sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(swapper_kernel_pmd, initial_kernel_pmd);
 	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
 		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
 	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
-					 unsigned long max_pfn)
+void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
-	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(initial_kernel_pmd, kernel_pmd);
 
 	xen_map_identity_early(initial_kernel_pmd, max_pfn);
 
-	memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	copy_page(initial_page_table, pgd);
 	initial_page_table[KERNEL_PGD_BOUNDARY] =
 		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
 
@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 
 	memblock_reserve(__pa(xen_start_info->pt_base),
 			 xen_start_info->nr_pt_frames * PAGE_SIZE);
-
-	return initial_page_table;
 }
 #endif	/* CONFIG_X86_64 */
 
@@ -2333,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	unsigned long range;
 	int err = 0;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -EINVAL;
+
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
@@ -2351,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		if (err)
 			goto out;
 
-		err = -EFAULT;
-		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
+		err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
+		if (err < 0)
 			goto out;
 
 		nr -= batch;

+ 85 - 7
arch/x86/xen/p2m.c

@@ -22,7 +22,7 @@
  *
  * P2M_PER_PAGE depends on the architecture, as a mfn is always
  * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
- * 512 and 1024 entries respectively. 
+ * 512 and 1024 entries respectively.
  *
  * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
  *
@@ -139,11 +139,11 @@
  *      /    | ~0, ~0, ....  |
  *     |     \---------------/
  *     |
- *     p2m_missing             p2m_missing
- * /------------------\     /------------\
- * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
- * | [p2m_mid_missing]+---->| ..., ~0    |
- * \------------------/     \------------/
+ *   p2m_mid_missing           p2m_missing
+ * /-----------------\     /------------\
+ * | [p2m_missing]   +---->| ~0, ~0, ~0 |
+ * | [p2m_missing]   +---->| ..., ~0    |
+ * \-----------------/     \------------/
  *
  * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
  */
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)
 
 	m2p_override_init();
 }
+#ifdef CONFIG_X86_64
+#include <linux/bootmem.h>
+unsigned long __init xen_revector_p2m_tree(void)
+{
+	unsigned long va_start;
+	unsigned long va_end;
+	unsigned long pfn;
+	unsigned long pfn_free = 0;
+	unsigned long *mfn_list = NULL;
+	unsigned long size;
+
+	va_start = xen_start_info->mfn_list;
+	/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
+	 * so make sure it is rounded up to that */
+	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+	va_end = va_start + size;
+
+	/* If we were revectored already, don't do it again. */
+	if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
+		return 0;
+
+	mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
+	if (!mfn_list) {
+		pr_warn("Could not allocate space for a new P2M tree!\n");
+		return xen_start_info->mfn_list;
+	}
+	/* Fill it out with INVALID_P2M_ENTRY value */
+	memset(mfn_list, 0xFF, size);
+
+	for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
+		unsigned topidx = p2m_top_index(pfn);
+		unsigned mididx;
+		unsigned long *mid_p;
+
+		if (!p2m_top[topidx])
+			continue;
+
+		if (p2m_top[topidx] == p2m_mid_missing)
+			continue;
+
+		mididx = p2m_mid_index(pfn);
+		mid_p = p2m_top[topidx][mididx];
+		if (!mid_p)
+			continue;
+		if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
+			continue;
+
+		if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
+			continue;
+
+		/* The old va. Rebase it on mfn_list */
+		if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
+			unsigned long *new;
+
+			if (pfn_free  > (size / sizeof(unsigned long))) {
+				WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
+				     size / sizeof(unsigned long), pfn_free);
+				return 0;
+			}
+			new = &mfn_list[pfn_free];
+
+			copy_page(new, mid_p);
+			p2m_top[topidx][mididx] = &mfn_list[pfn_free];
+			p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
+
+			pfn_free += P2M_PER_PAGE;
 
+		}
+		/* This should be the leafs allocated for identity from _brk. */
+	}
+	return (unsigned long)mfn_list;
+
+}
+#else
+unsigned long __init xen_revector_p2m_tree(void)
+{
+	return 0;
+}
+#endif
 unsigned long get_phys_to_machine(unsigned long pfn)
 {
 	unsigned topidx, mididx, idx;
@@ -430,7 +508,7 @@ static void free_p2m_page(void *p)
 	free_page((unsigned long)p);
 }
 
-/* 
+/*
  * Fully allocate the p2m structure for a given pfn.  We need to check
  * that both the top and mid levels are allocated, and make sure the
  * parallel mfn tree is kept in sync.  We may race with other cpus, so

+ 45 - 7
arch/x86/xen/pci-swiotlb-xen.c

@@ -8,6 +8,14 @@
 #include <xen/xen.h>
 #include <asm/iommu_table.h>
 
+
+#include <asm/xen/swiotlb-xen.h>
+#ifdef CONFIG_X86_64
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#endif
+#include <linux/export.h>
+
 int xen_swiotlb __read_mostly;
 
 static struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 int __init pci_xen_swiotlb_detect(void)
 {
 
+	if (!xen_pv_domain())
+		return 0;
+
 	/* If running as PV guest, either iommu=soft, or swiotlb=force will
 	 * activate this IOMMU. If running as PV privileged, activate it
 	 * irregardless.
 	 */
-	if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
-	    (xen_pv_domain()))
+	if ((xen_initial_domain() || swiotlb || swiotlb_force))
 		xen_swiotlb = 1;
 
 	/* If we are running under Xen, we MUST disable the native SWIOTLB.
 	 * Don't worry about swiotlb_force flag activating the native, as
 	 * the 'swiotlb' flag is the only one turning it on. */
-	if (xen_pv_domain())
-		swiotlb = 0;
+	swiotlb = 0;
 
+#ifdef CONFIG_X86_64
+	/* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0
+	 * (so no iommu=X command line over-writes).
+	 * Considering that PV guests do not want the *native SWIOTLB* but
+	 * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here.
+	 */
+	if (max_pfn > MAX_DMA32_PFN)
+		no_iommu = 1;
+#endif
 	return xen_swiotlb;
 }
 
 void __init pci_xen_swiotlb_init(void)
 {
 	if (xen_swiotlb) {
-		xen_swiotlb_init(1);
+		xen_swiotlb_init(1, true /* early */);
 		dma_ops = &xen_swiotlb_dma_ops;
 
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 	}
 }
+
+int pci_xen_swiotlb_init_late(void)
+{
+	int rc;
+
+	if (xen_swiotlb)
+		return 0;
+
+	rc = xen_swiotlb_init(1, false /* late */);
+	if (rc)
+		return rc;
+
+	dma_ops = &xen_swiotlb_dma_ops;
+	/* Make sure ACS will be enabled */
+	pci_request_acs();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
+
 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
-		  0,
+		  NULL,
 		  pci_xen_swiotlb_init,
-		  0);
+		  NULL);

+ 1 - 0
arch/x86/xen/platform-pci-unplug.c

@@ -24,6 +24,7 @@
 #include <linux/module.h>
 
 #include <xen/platform_pci.h>
+#include "xen-ops.h"
 
 #define XEN_PLATFORM_ERR_MAGIC -1
 #define XEN_PLATFORM_ERR_PROTOCOL -2

+ 18 - 0
arch/x86/xen/setup.c

@@ -432,6 +432,24 @@ char * __init xen_memory_setup(void)
 	 *  - mfn_list
 	 *  - xen_start_info
 	 * See comment above "struct start_info" in <xen/interface/xen.h>
+	 * We tried to make the the memblock_reserve more selective so
+	 * that it would be clear what region is reserved. Sadly we ran
+	 * in the problem wherein on a 64-bit hypervisor with a 32-bit
+	 * initial domain, the pt_base has the cr3 value which is not
+	 * neccessarily where the pagetable starts! As Jan put it: "
+	 * Actually, the adjustment turns out to be correct: The page
+	 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
+	 * "first L2", "first L3", so the offset to the page table base is
+	 * indeed 2. When reading xen/include/public/xen.h's comment
+	 * very strictly, this is not a violation (since there nothing is said
+	 * that the first thing in the page table space is pointed to by
+	 * pt_base; I admit that this seems to be implied though, namely
+	 * do I think that it is implied that the page table space is the
+	 * range [pt_base, pt_base + nt_pt_frames), whereas that
+	 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
+	 * which - without a priori knowledge - the kernel would have
+	 * difficulty to figure out)." - so lets just fall back to the
+	 * easy way and reserve the whole region.
 	 */
 	memblock_reserve(__pa(xen_start_info->mfn_list),
 			 xen_start_info->pt_base - xen_start_info->mfn_list);

+ 7 - 0
arch/x86/xen/vga.c

@@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
 			info->u.text_mode_3.font_height;
 		break;
 
+	case XEN_VGATYPE_EFI_LFB:
 	case XEN_VGATYPE_VESA_LFB:
 		if (size < offsetof(struct dom0_vga_console_info,
 				    u.vesa_lfb.gbl_caps))
@@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
 		screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
 		screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
 		screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+
+		if (info->video_type == XEN_VGATYPE_EFI_LFB) {
+			screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
+			break;
+		}
+
 		if (size >= offsetof(struct dom0_vga_console_info,
 				     u.vesa_lfb.gbl_caps)
 		    + sizeof(info->u.vesa_lfb.gbl_caps))

+ 54 - 2
arch/x86/xen/xen-head.S

@@ -28,9 +28,61 @@ ENTRY(startup_xen)
 	__FINIT
 
 .pushsection .text
-	.align PAGE_SIZE
+	.balign PAGE_SIZE
 ENTRY(hypercall_page)
-	.skip PAGE_SIZE
+#define NEXT_HYPERCALL(x) \
+	ENTRY(xen_hypercall_##x) \
+	.skip 32
+
+NEXT_HYPERCALL(set_trap_table)
+NEXT_HYPERCALL(mmu_update)
+NEXT_HYPERCALL(set_gdt)
+NEXT_HYPERCALL(stack_switch)
+NEXT_HYPERCALL(set_callbacks)
+NEXT_HYPERCALL(fpu_taskswitch)
+NEXT_HYPERCALL(sched_op_compat)
+NEXT_HYPERCALL(platform_op)
+NEXT_HYPERCALL(set_debugreg)
+NEXT_HYPERCALL(get_debugreg)
+NEXT_HYPERCALL(update_descriptor)
+NEXT_HYPERCALL(ni)
+NEXT_HYPERCALL(memory_op)
+NEXT_HYPERCALL(multicall)
+NEXT_HYPERCALL(update_va_mapping)
+NEXT_HYPERCALL(set_timer_op)
+NEXT_HYPERCALL(event_channel_op_compat)
+NEXT_HYPERCALL(xen_version)
+NEXT_HYPERCALL(console_io)
+NEXT_HYPERCALL(physdev_op_compat)
+NEXT_HYPERCALL(grant_table_op)
+NEXT_HYPERCALL(vm_assist)
+NEXT_HYPERCALL(update_va_mapping_otherdomain)
+NEXT_HYPERCALL(iret)
+NEXT_HYPERCALL(vcpu_op)
+NEXT_HYPERCALL(set_segment_base)
+NEXT_HYPERCALL(mmuext_op)
+NEXT_HYPERCALL(xsm_op)
+NEXT_HYPERCALL(nmi_op)
+NEXT_HYPERCALL(sched_op)
+NEXT_HYPERCALL(callback_op)
+NEXT_HYPERCALL(xenoprof_op)
+NEXT_HYPERCALL(event_channel_op)
+NEXT_HYPERCALL(physdev_op)
+NEXT_HYPERCALL(hvm_op)
+NEXT_HYPERCALL(sysctl)
+NEXT_HYPERCALL(domctl)
+NEXT_HYPERCALL(kexec_op)
+NEXT_HYPERCALL(tmem_op) /* 38 */
+ENTRY(xen_hypercall_rsvr)
+	.skip 320
+NEXT_HYPERCALL(mca) /* 48 */
+NEXT_HYPERCALL(arch_1)
+NEXT_HYPERCALL(arch_2)
+NEXT_HYPERCALL(arch_3)
+NEXT_HYPERCALL(arch_4)
+NEXT_HYPERCALL(arch_5)
+NEXT_HYPERCALL(arch_6)
+	.balign PAGE_SIZE
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")

+ 2 - 1
arch/x86/xen/xen-ops.h

@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
 void xen_setup_shared_info(void);
 void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
-pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
+void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
 extern unsigned long xen_max_p2m_pfn;
 
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
+unsigned long __init xen_revector_p2m_tree(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);

+ 3 - 8
drivers/net/xen-netback/netback.c

@@ -635,9 +635,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
 		return;
 
 	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
-	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
-					npo.copy_prod);
-	BUG_ON(ret != 0);
+	gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
 
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 		sco = (struct skb_cb_overlay *)skb->cb;
@@ -1460,18 +1458,15 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
 static void xen_netbk_tx_action(struct xen_netbk *netbk)
 {
 	unsigned nr_gops;
-	int ret;
 
 	nr_gops = xen_netbk_tx_build_gops(netbk);
 
 	if (nr_gops == 0)
 		return;
-	ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
-					netbk->tx_copy_ops, nr_gops);
-	BUG_ON(ret);
 
-	xen_netbk_tx_submit(netbk);
+	gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
 
+	xen_netbk_tx_submit(netbk);
 }
 
 static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx)

+ 10 - 5
drivers/pci/xen-pcifront.c

@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/time.h>
 
+#include <asm/xen/swiotlb-xen.h>
 #define INVALID_GRANT_REF (0)
 #define INVALID_EVTCHN    (-1)
 
@@ -236,7 +237,7 @@ static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
 	return errno_to_pcibios_err(do_pci_op(pdev, &op));
 }
 
-struct pci_ops pcifront_bus_ops = {
+static struct pci_ops pcifront_bus_ops = {
 	.read = pcifront_bus_read,
 	.write = pcifront_bus_write,
 };
@@ -668,7 +669,7 @@ static irqreturn_t pcifront_handler_aer(int irq, void *dev)
 	schedule_pcifront_aer_op(pdev);
 	return IRQ_HANDLED;
 }
-static int pcifront_connect(struct pcifront_device *pdev)
+static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
 {
 	int err = 0;
 
@@ -681,9 +682,13 @@ static int pcifront_connect(struct pcifront_device *pdev)
 		dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
 		err = -EEXIST;
 	}
-
 	spin_unlock(&pcifront_dev_lock);
 
+	if (!err && !swiotlb_nr_tbl()) {
+		err = pci_xen_swiotlb_init_late();
+		if (err)
+			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
+	}
 	return err;
 }
 
@@ -842,10 +847,10 @@ static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
 	    XenbusStateInitialised)
 		goto out;
 
-	err = pcifront_connect(pdev);
+	err = pcifront_connect_and_init_dma(pdev);
 	if (err) {
 		xenbus_dev_fatal(pdev->xdev, err,
-				 "Error connecting PCI Frontend");
+				 "Error setting up PCI Frontend");
 		goto out;
 	}
 

+ 2 - 0
drivers/tty/hvc/hvc_xen.c

@@ -21,6 +21,7 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/list.h>
@@ -35,6 +36,7 @@
 #include <xen/page.h>
 #include <xen/events.h>
 #include <xen/interface/io/console.h>
+#include <xen/interface/sched.h>
 #include <xen/hvc-console.h>
 #include <xen/xenbus.h>
 

+ 15 - 3
drivers/xen/events.c

@@ -373,11 +373,22 @@ static void unmask_evtchn(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	unsigned int cpu = get_cpu();
+	int do_hypercall = 0, evtchn_pending = 0;
 
 	BUG_ON(!irqs_disabled());
 
-	/* Slow path (hypercall) if this is a non-local port. */
-	if (unlikely(cpu != cpu_from_evtchn(port))) {
+	if (unlikely((cpu != cpu_from_evtchn(port))))
+		do_hypercall = 1;
+	else
+		evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]);
+
+	if (unlikely(evtchn_pending && xen_hvm_domain()))
+		do_hypercall = 1;
+
+	/* Slow path (hypercall) if this is a non-local port or if this is
+	 * an hvm domain and an event is pending (hvm domains don't have
+	 * their own implementation of irq_enable). */
+	if (do_hypercall) {
 		struct evtchn_unmask unmask = { .port = port };
 		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
 	} else {
@@ -390,7 +401,7 @@ static void unmask_evtchn(int port)
 		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
 		 * the interrupt edge' if the channel is masked.
 		 */
-		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
+		if (evtchn_pending &&
 		    !sync_test_and_set_bit(port / BITS_PER_LONG,
 					   &vcpu_info->evtchn_pending_sel))
 			vcpu_info->evtchn_upcall_pending = 1;
@@ -831,6 +842,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 		struct irq_info *info = info_for_irq(irq);
 		WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
 	}
+	irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
 
 out:
 	mutex_unlock(&irq_mapping_update_lock);

+ 1 - 1
drivers/xen/gntdev.c

@@ -446,7 +446,7 @@ static void mn_release(struct mmu_notifier *mn,
 	spin_unlock(&priv->lock);
 }
 
-struct mmu_notifier_ops gntdev_mmu_ops = {
+static struct mmu_notifier_ops gntdev_mmu_ops = {
 	.release                = mn_release,
 	.invalidate_page        = mn_invl_page,
 	.invalidate_range_start = mn_invl_range_start,

+ 60 - 7
drivers/xen/grant-table.c

@@ -38,6 +38,7 @@
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/delay.h>
 #include <linux/hardirq.h>
 
 #include <xen/xen.h>
@@ -47,6 +48,7 @@
 #include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/interface.h>
 
 #include <asm/pgtable.h>
 #include <asm/sync_bitops.h>
@@ -285,10 +287,9 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
-void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
-				    unsigned long frame, int flags,
-				    unsigned page_off,
-				    unsigned length)
+static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
+					   unsigned long frame, int flags,
+					   unsigned page_off, unsigned length)
 {
 	gnttab_shared.v2[ref].sub_page.frame = frame;
 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
@@ -345,9 +346,9 @@ bool gnttab_subpage_grants_available(void)
 }
 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
 
-void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
-				  int flags, domid_t trans_domid,
-				  grant_ref_t trans_gref)
+static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
+					 int flags, domid_t trans_domid,
+					 grant_ref_t trans_gref)
 {
 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
@@ -823,6 +824,52 @@ unsigned int gnttab_max_grant_frames(void)
 }
 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
+/* Handling of paged out grant targets (GNTST_eagain) */
+#define MAX_DELAY 256
+static inline void
+gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
+						const char *func)
+{
+	unsigned delay = 1;
+
+	do {
+		BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
+		if (*status == GNTST_eagain)
+			msleep(delay++);
+	} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
+
+	if (delay >= MAX_DELAY) {
+		printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
+		*status = GNTST_bad_page;
+	}
+}
+
+void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
+{
+	struct gnttab_map_grant_ref *op;
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
+		BUG();
+	for (op = batch; op < batch + count; op++)
+		if (op->status == GNTST_eagain)
+			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
+						&op->status, __func__);
+}
+EXPORT_SYMBOL_GPL(gnttab_batch_map);
+
+void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
+{
+	struct gnttab_copy *op;
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
+		BUG();
+	for (op = batch; op < batch + count; op++)
+		if (op->status == GNTST_eagain)
+			gnttab_retry_eagain_gop(GNTTABOP_copy, op,
+						&op->status, __func__);
+}
+EXPORT_SYMBOL_GPL(gnttab_batch_copy);
+
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count)
@@ -836,6 +883,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 	if (ret)
 		return ret;
 
+	/* Retry eagain maps */
+	for (i = 0; i < count; i++)
+		if (map_ops[i].status == GNTST_eagain)
+			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
+						&map_ops[i].status, __func__);
+
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return ret;
 

+ 102 - 33
drivers/xen/privcmd.c

@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
  */
 static int gather_array(struct list_head *pagelist,
 			unsigned nelem, size_t size,
-			void __user *data)
+			const void __user *data)
 {
 	unsigned pageidx;
 	void *pagedata;
@@ -246,61 +246,117 @@ struct mmap_batch_state {
 	domid_t domain;
 	unsigned long va;
 	struct vm_area_struct *vma;
-	int err;
-
-	xen_pfn_t __user *user;
+	/* A tristate:
+	 *      0 for no errors
+	 *      1 if at least one error has happened (and no
+	 *          -ENOENT errors have happened)
+	 *      -ENOENT if at least 1 -ENOENT has happened.
+	 */
+	int global_error;
+	/* An array for individual errors */
+	int *err;
+
+	/* User-space mfn array to store errors in the second pass for V1. */
+	xen_pfn_t __user *user_mfn;
 };
 
 static int mmap_batch_fn(void *data, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
+	int ret;
+
+	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+					 st->vma->vm_page_prot, st->domain);
 
-	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-				       st->vma->vm_page_prot, st->domain) < 0) {
-		*mfnp |= 0xf0000000U;
-		st->err++;
+	/* Store error code for second pass. */
+	*(st->err++) = ret;
+
+	/* And see if it affects the global_error. */
+	if (ret < 0) {
+		if (ret == -ENOENT)
+			st->global_error = -ENOENT;
+		else {
+			/* Record that at least one error has happened. */
+			if (st->global_error == 0)
+				st->global_error = 1;
+		}
 	}
 	st->va += PAGE_SIZE;
 
 	return 0;
 }
 
-static int mmap_return_errors(void *data, void *state)
+static int mmap_return_errors_v1(void *data, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
-
-	return put_user(*mfnp, st->user++);
+	int err = *(st->err++);
+
+	/*
+	 * V1 encodes the error codes in the 32bit top nibble of the
+	 * mfn (with its known limitations vis-a-vis 64 bit callers).
+	 */
+	*mfnp |= (err == -ENOENT) ?
+				PRIVCMD_MMAPBATCH_PAGED_ERROR :
+				PRIVCMD_MMAPBATCH_MFN_ERROR;
+	return __put_user(*mfnp, st->user_mfn++);
 }
 
 static struct vm_operations_struct privcmd_vm_ops;
 
-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 {
 	int ret;
-	struct privcmd_mmapbatch m;
+	struct privcmd_mmapbatch_v2 m;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long nr_pages;
 	LIST_HEAD(pagelist);
+	int *err_array = NULL;
 	struct mmap_batch_state state;
 
 	if (!xen_initial_domain())
 		return -EPERM;
 
-	if (copy_from_user(&m, udata, sizeof(m)))
-		return -EFAULT;
+	switch (version) {
+	case 1:
+		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+			return -EFAULT;
+		/* Returns per-frame error in m.arr. */
+		m.err = NULL;
+		if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+			return -EFAULT;
+		break;
+	case 2:
+		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+			return -EFAULT;
+		/* Returns per-frame error code in m.err. */
+		if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+			return -EFAULT;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	nr_pages = m.num;
 	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 		return -EINVAL;
 
-	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-			   m.arr);
+	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 
-	if (ret || list_empty(&pagelist))
+	if (ret)
 		goto out;
+	if (list_empty(&pagelist)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
+	if (err_array == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	down_write(&mm->mmap_sem);
 
@@ -315,24 +371,37 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
 		goto out;
 	}
 
-	state.domain = m.dom;
-	state.vma = vma;
-	state.va = m.addr;
-	state.err = 0;
+	state.domain        = m.dom;
+	state.vma           = vma;
+	state.va            = m.addr;
+	state.global_error  = 0;
+	state.err           = err_array;
 
-	ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			     &pagelist, mmap_batch_fn, &state);
+	/* mmap_batch_fn guarantees ret == 0 */
+	BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+			     &pagelist, mmap_batch_fn, &state));
 
 	up_write(&mm->mmap_sem);
 
-	if (state.err > 0) {
-		state.user = m.arr;
+	if (state.global_error && (version == 1)) {
+		/* Write back errors in second pass. */
+		state.user_mfn = (xen_pfn_t *)m.arr;
+		state.err      = err_array;
 		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			       &pagelist,
-			       mmap_return_errors, &state);
+				     &pagelist, mmap_return_errors_v1, &state);
+	} else if (version == 2) {
+		ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
+		if (ret)
+			ret = -EFAULT;
 	}
 
+	/* If we have not had any EFAULT-like global errors then set the global
+	 * error to -ENOENT if necessary. */
+	if ((ret == 0) && (state.global_error == -ENOENT))
+		ret = -ENOENT;
+
 out:
+	kfree(err_array);
 	free_page_list(&pagelist);
 
 	return ret;
@@ -354,7 +423,11 @@ static long privcmd_ioctl(struct file *file,
 		break;
 
 	case IOCTL_PRIVCMD_MMAPBATCH:
-		ret = privcmd_ioctl_mmap_batch(udata);
+		ret = privcmd_ioctl_mmap_batch(udata, 1);
+		break;
+
+	case IOCTL_PRIVCMD_MMAPBATCH_V2:
+		ret = privcmd_ioctl_mmap_batch(udata, 2);
 		break;
 
 	default:
@@ -380,10 +453,6 @@ static struct vm_operations_struct privcmd_vm_ops = {
 
 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	/* Unsupported for auto-translate guests. */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -ENOSYS;
-
 	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
 	 * how to recreate these mappings */
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;

+ 76 - 43
drivers/xen/swiotlb-xen.c

@@ -52,7 +52,7 @@ static unsigned long xen_io_tlb_nslabs;
  * Quick lookup value of the bus address of the IOTLB.
  */
 
-u64 start_dma_addr;
+static u64 start_dma_addr;
 
 static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
 {
@@ -144,31 +144,72 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 	} while (i < nslabs);
 	return 0;
 }
+static unsigned long xen_set_nslabs(unsigned long nr_tbl)
+{
+	if (!nr_tbl) {
+		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
+		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
+	} else
+		xen_io_tlb_nslabs = nr_tbl;
 
-void __init xen_swiotlb_init(int verbose)
+	return xen_io_tlb_nslabs << IO_TLB_SHIFT;
+}
+
+enum xen_swiotlb_err {
+	XEN_SWIOTLB_UNKNOWN = 0,
+	XEN_SWIOTLB_ENOMEM,
+	XEN_SWIOTLB_EFIXUP
+};
+
+static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
+{
+	switch (err) {
+	case XEN_SWIOTLB_ENOMEM:
+		return "Cannot allocate Xen-SWIOTLB buffer\n";
+	case XEN_SWIOTLB_EFIXUP:
+		return "Failed to get contiguous memory for DMA from Xen!\n"\
+		    "You either: don't have the permissions, do not have"\
+		    " enough free memory under 4GB, or the hypervisor memory"\
+		    " is too fragmented!";
+	default:
+		break;
+	}
+	return "";
+}
+int __ref xen_swiotlb_init(int verbose, bool early)
 {
-	unsigned long bytes;
+	unsigned long bytes, order;
 	int rc = -ENOMEM;
-	unsigned long nr_tbl;
-	char *m = NULL;
+	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
 
-	nr_tbl = swiotlb_nr_tbl();
-	if (nr_tbl)
-		xen_io_tlb_nslabs = nr_tbl;
-	else {
-		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
-		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
+	xen_io_tlb_nslabs = swiotlb_nr_tbl();
 retry:
-	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
-
+	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
+	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
 	/*
 	 * Get IO TLB memory from any location.
 	 */
-	xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+	if (early)
+		xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+	else {
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+			xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
+			if (xen_io_tlb_start)
+				break;
+			order--;
+		}
+		if (order != get_order(bytes)) {
+			pr_warn("Warning: only able to allocate %ld MB "
+				"for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
+			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+		}
+	}
 	if (!xen_io_tlb_start) {
-		m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
 	}
 	xen_io_tlb_end = xen_io_tlb_start + bytes;
@@ -179,17 +220,22 @@ retry:
 			       bytes,
 			       xen_io_tlb_nslabs);
 	if (rc) {
-		free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
-		m = "Failed to get contiguous memory for DMA from Xen!\n"\
-		    "You either: don't have the permissions, do not have"\
-		    " enough free memory under 4GB, or the hypervisor memory"\
-		    "is too fragmented!";
+		if (early)
+			free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+		else {
+			free_pages((unsigned long)xen_io_tlb_start, order);
+			xen_io_tlb_start = NULL;
+		}
+		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
 	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
-	swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
-
-	return;
+	if (early) {
+		swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
+		rc = 0;
+	} else
+		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+	return rc;
 error:
 	if (repeat--) {
 		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
@@ -198,10 +244,13 @@ error:
 		      (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
-	xen_raw_printk("%s (rc:%d)", m, rc);
-	panic("%s (rc:%d)", m, rc);
+	pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+	if (early)
+		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+	else
+		free_pages((unsigned long)xen_io_tlb_start, order);
+	return rc;
 }
-
 void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
@@ -466,14 +515,6 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs);
 
-int
-xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		   enum dma_data_direction dir)
-{
-	return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
-}
-EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg);
-
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
  * concerning calls here are the same as for swiotlb_unmap_page() above.
@@ -494,14 +535,6 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
 
-void
-xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		     enum dma_data_direction dir)
-{
-	return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
-}
-EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg);
-
 /*
  * Make physical memory consistent for a set of streaming mode DMA translations
  * after a transfer.

+ 12 - 1
drivers/xen/sys-hypervisor.c

@@ -114,7 +114,7 @@ static void xen_sysfs_version_destroy(void)
 
 /* UUID */
 
-static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer)
 {
 	char *vm, *val;
 	int ret;
@@ -135,6 +135,17 @@ static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
 	return ret;
 }
 
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+	xen_domain_handle_t uuid;
+	int ret;
+	ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid);
+	if (ret)
+		return uuid_show_fallback(attr, buffer);
+	ret = sprintf(buffer, "%pU\n", uuid);
+	return ret;
+}
+
 HYPERVISOR_ATTR_RO(uuid);
 
 static int __init xen_sysfs_uuid_init(void)

+ 1 - 0
drivers/xen/tmem.c

@@ -21,6 +21,7 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/tmem.h>
 
 #define TMEM_CONTROL               0
 #define TMEM_NEW_POOL              1

+ 106 - 30
drivers/xen/xen-pciback/pci_stub.c

@@ -362,6 +362,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
 	else {
 		dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n");
 		__pci_reset_function_locked(dev);
+		pci_restore_state(dev);
 	}
 	/* Now disable the device (this also ensures some private device
 	 * data is setup before we export)
@@ -681,14 +682,14 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
 		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 
 	if (!test_bit(_XEN_PCIB_AERHANDLER,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
-		goto release;
+		goto end;
 	}
 	result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
 
@@ -698,9 +699,9 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev)
 			"No AER slot_reset service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
-release:
-	pcistub_device_put(psdev);
 end:
+	if (psdev)
+		pcistub_device_put(psdev);
 	up_write(&pcistub_sem);
 	return result;
 
@@ -739,14 +740,14 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
 		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 
 	if (!test_bit(_XEN_PCIB_AERHANDLER,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
-		goto release;
+		goto end;
 	}
 	result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
 
@@ -756,9 +757,9 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev)
 			"No AER mmio_enabled service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
-release:
-	pcistub_device_put(psdev);
 end:
+	if (psdev)
+		pcistub_device_put(psdev);
 	up_write(&pcistub_sem);
 	return result;
 }
@@ -797,7 +798,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
 		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 
 	/*Guest owns the device yet no aer handler regiested, kill guest*/
@@ -805,7 +806,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 	result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
 
@@ -815,9 +816,9 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev,
 			"No AER error_detected service or disconnected!\n");
 		kill_domain_by_device(psdev);
 	}
-release:
-	pcistub_device_put(psdev);
 end:
+	if (psdev)
+		pcistub_device_put(psdev);
 	up_write(&pcistub_sem);
 	return result;
 }
@@ -851,7 +852,7 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
 		dev_err(&dev->dev, DRV_NAME " device is not connected or owned"
 			" by HVM, kill it\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 
 	if (!test_bit(_XEN_PCIB_AERHANDLER,
@@ -859,13 +860,13 @@ static void xen_pcibk_error_resume(struct pci_dev *dev)
 		dev_err(&dev->dev,
 			"guest with no AER driver should have been killed\n");
 		kill_domain_by_device(psdev);
-		goto release;
+		goto end;
 	}
 	common_process(psdev, 1, XEN_PCI_OP_aer_resume,
 		       PCI_ERS_RESULT_RECOVERED);
-release:
-	pcistub_device_put(psdev);
 end:
+	if (psdev)
+		pcistub_device_put(psdev);
 	up_write(&pcistub_sem);
 	return;
 }
@@ -897,17 +898,41 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus,
 			      int *slot, int *func)
 {
 	int err;
+	char wc = '*';
 
 	err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
-	if (err == 4)
+	switch (err) {
+	case 3:
+		*func = -1;
+		err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc);
+		break;
+	case 2:
+		*slot = *func = -1;
+		err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc);
+		if (err >= 2)
+			++err;
+		break;
+	}
+	if (err == 4 && wc == '*')
 		return 0;
 	else if (err < 0)
 		return -EINVAL;
 
 	/* try again without domain */
 	*domain = 0;
+	wc = '*';
 	err = sscanf(buf, " %x:%x.%x", bus, slot, func);
-	if (err == 3)
+	switch (err) {
+	case 2:
+		*func = -1;
+		err = sscanf(buf, " %x:%x.%c", bus, slot, &wc);
+		break;
+	case 1:
+		*slot = *func = -1;
+		err = sscanf(buf, " %x:*.%c", bus, &wc) + 1;
+		break;
+	}
+	if (err == 3 && wc == '*')
 		return 0;
 
 	return -EINVAL;
@@ -930,6 +955,19 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
 {
 	struct pcistub_device_id *pci_dev_id;
 	unsigned long flags;
+	int rc = 0;
+
+	if (slot < 0) {
+		for (slot = 0; !rc && slot < 32; ++slot)
+			rc = pcistub_device_id_add(domain, bus, slot, func);
+		return rc;
+	}
+
+	if (func < 0) {
+		for (func = 0; !rc && func < 8; ++func)
+			rc = pcistub_device_id_add(domain, bus, slot, func);
+		return rc;
+	}
 
 	pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
 	if (!pci_dev_id)
@@ -952,15 +990,15 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
 static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
 {
 	struct pcistub_device_id *pci_dev_id, *t;
-	int devfn = PCI_DEVFN(slot, func);
 	int err = -ENOENT;
 	unsigned long flags;
 
 	spin_lock_irqsave(&device_ids_lock, flags);
 	list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids,
 				 slot_list) {
-		if (pci_dev_id->domain == domain
-		    && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
+		if (pci_dev_id->domain == domain && pci_dev_id->bus == bus
+		    && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot)
+		    && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) {
 			/* Don't break; here because it's possible the same
 			 * slot could be in the list more than once
 			 */
@@ -987,7 +1025,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
 	struct config_field *field;
 
 	psdev = pcistub_device_find(domain, bus, slot, func);
-	if (!psdev || !psdev->dev) {
+	if (!psdev) {
 		err = -ENODEV;
 		goto out;
 	}
@@ -1011,6 +1049,8 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
 	if (err)
 		kfree(field);
 out:
+	if (psdev)
+		pcistub_device_put(psdev);
 	return err;
 }
 
@@ -1115,10 +1155,9 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
 
 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
 	if (err)
-		goto out;
+		return err;
 
 	psdev = pcistub_device_find(domain, bus, slot, func);
-
 	if (!psdev)
 		goto out;
 
@@ -1134,6 +1173,8 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
 	if (dev_data->isr_on)
 		dev_data->ack_intr = 1;
 out:
+	if (psdev)
+		pcistub_device_put(psdev);
 	if (!err)
 		err = count;
 	return err;
@@ -1216,15 +1257,16 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf,
 	err = str_to_slot(buf, &domain, &bus, &slot, &func);
 	if (err)
 		goto out;
+	if (slot < 0 || func < 0) {
+		err = -EINVAL;
+		goto out;
+	}
 	psdev = pcistub_device_find(domain, bus, slot, func);
 	if (!psdev) {
 		err = -ENODEV;
 		goto out;
 	}
-	if (!psdev->dev) {
-		err = -ENODEV;
-		goto release;
-	}
+
 	dev_data = pci_get_drvdata(psdev->dev);
 	/* the driver data for a device should never be null at this point */
 	if (!dev_data) {
@@ -1297,17 +1339,51 @@ static int __init pcistub_init(void)
 
 	if (pci_devs_to_hide && *pci_devs_to_hide) {
 		do {
+			char wc = '*';
+
 			parsed = 0;
 
 			err = sscanf(pci_devs_to_hide + pos,
 				     " (%x:%x:%x.%x) %n",
 				     &domain, &bus, &slot, &func, &parsed);
-			if (err != 4) {
+			switch (err) {
+			case 3:
+				func = -1;
+				err = sscanf(pci_devs_to_hide + pos,
+					     " (%x:%x:%x.%c) %n",
+					     &domain, &bus, &slot, &wc,
+					     &parsed);
+				break;
+			case 2:
+				slot = func = -1;
+				err = sscanf(pci_devs_to_hide + pos,
+					     " (%x:%x:*.%c) %n",
+					     &domain, &bus, &wc, &parsed) + 1;
+				break;
+			}
+
+			if (err != 4 || wc != '*') {
 				domain = 0;
+				wc = '*';
 				err = sscanf(pci_devs_to_hide + pos,
 					     " (%x:%x.%x) %n",
 					     &bus, &slot, &func, &parsed);
-				if (err != 3)
+				switch (err) {
+				case 2:
+					func = -1;
+					err = sscanf(pci_devs_to_hide + pos,
+						     " (%x:%x.%c) %n",
+						     &bus, &slot, &wc,
+						     &parsed);
+					break;
+				case 1:
+					slot = func = -1;
+					err = sscanf(pci_devs_to_hide + pos,
+						     " (%x:*.%c) %n",
+						     &bus, &wc, &parsed) + 1;
+					break;
+				}
+				if (err != 3 || wc != '*')
 					goto parse_error;
 			}
 

+ 2 - 4
drivers/xen/xenbus/xenbus_client.c

@@ -490,8 +490,7 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
 
 	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
+	gnttab_batch_map(&op, 1);
 
 	if (op.status != GNTST_okay) {
 		free_vm_area(area);
@@ -572,8 +571,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
 	gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref,
 			  dev->otherend_id);
 
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
+	gnttab_batch_map(&op, 1);
 
 	if (op.status != GNTST_okay) {
 		xenbus_dev_fatal(dev, op.status,

+ 1 - 1
drivers/xen/xenbus/xenbus_comms.c

@@ -224,7 +224,7 @@ int xb_init_comms(void)
 		int err;
 		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
 						0, "xenbus", &xb_waitq);
-		if (err <= 0) {
+		if (err < 0) {
 			printk(KERN_ERR "XENBUS request irq failed %i\n", err);
 			return err;
 		}

+ 1 - 1
drivers/xen/xenbus/xenbus_dev_backend.c

@@ -107,7 +107,7 @@ static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-const struct file_operations xenbus_backend_fops = {
+static const struct file_operations xenbus_backend_fops = {
 	.open = xenbus_backend_open,
 	.mmap = xenbus_backend_mmap,
 	.unlocked_ioctl = xenbus_backend_ioctl,

+ 40 - 16
drivers/xen/xenbus/xenbus_probe.c

@@ -324,8 +324,8 @@ static int cmp_dev(struct device *dev, void *data)
 	return 0;
 }
 
-struct xenbus_device *xenbus_device_find(const char *nodename,
-					 struct bus_type *bus)
+static struct xenbus_device *xenbus_device_find(const char *nodename,
+						struct bus_type *bus)
 {
 	struct xb_find_info info = { .dev = NULL, .nodename = nodename };
 
@@ -719,17 +719,47 @@ static int __init xenstored_local_init(void)
 	return err;
 }
 
+enum xenstore_init {
+	UNKNOWN,
+	PV,
+	HVM,
+	LOCAL,
+};
 static int __init xenbus_init(void)
 {
 	int err = 0;
+	enum xenstore_init usage = UNKNOWN;
+	uint64_t v = 0;
 
 	if (!xen_domain())
 		return -ENODEV;
 
 	xenbus_ring_ops_init();
 
-	if (xen_hvm_domain()) {
-		uint64_t v = 0;
+	if (xen_pv_domain())
+		usage = PV;
+	if (xen_hvm_domain())
+		usage = HVM;
+	if (xen_hvm_domain() && xen_initial_domain())
+		usage = LOCAL;
+	if (xen_pv_domain() && !xen_start_info->store_evtchn)
+		usage = LOCAL;
+	if (xen_pv_domain() && xen_start_info->store_evtchn)
+		xenstored_ready = 1;
+
+	switch (usage) {
+	case LOCAL:
+		err = xenstored_local_init();
+		if (err)
+			goto out_error;
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
+		break;
+	case PV:
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
+		break;
+	case HVM:
 		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
 		if (err)
 			goto out_error;
@@ -738,18 +768,12 @@ static int __init xenbus_init(void)
 		if (err)
 			goto out_error;
 		xen_store_mfn = (unsigned long)v;
-		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
-	} else {
-		xen_store_evtchn = xen_start_info->store_evtchn;
-		xen_store_mfn = xen_start_info->store_mfn;
-		if (xen_store_evtchn)
-			xenstored_ready = 1;
-		else {
-			err = xenstored_local_init();
-			if (err)
-				goto out_error;
-		}
-		xen_store_interface = mfn_to_virt(xen_store_mfn);
+		xen_store_interface =
+			ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+		break;
+	default:
+		pr_warn("Xenstore state unknown\n");
+		break;
 	}
 
 	/* Initialize the interface to xenstore. */

+ 1 - 0
drivers/xen/xenbus/xenbus_probe_frontend.c

@@ -21,6 +21,7 @@
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <xen/page.h>
+#include <xen/xen.h>
 
 #include <xen/platform_pci.h>
 

+ 2 - 1
drivers/xen/xenbus/xenbus_xs.c

@@ -44,6 +44,7 @@
 #include <linux/rwsem.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <asm/xen/hypervisor.h>
 #include <xen/xenbus.h>
 #include <xen/xen.h>
 #include "xenbus_comms.h"
@@ -622,7 +623,7 @@ static void xs_reset_watches(void)
 {
 	int err, supported = 0;
 
-	if (!xen_hvm_domain())
+	if (!xen_hvm_domain() || xen_initial_domain())
 		return;
 
 	err = xenbus_scanf(XBT_NIL, "control",

+ 1 - 0
include/linux/swiotlb.h

@@ -25,6 +25,7 @@ extern int swiotlb_force;
 extern void swiotlb_init(int verbose);
 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 extern unsigned long swiotlb_nr_tbl(void);
+extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
 
 /*
  * Enumeration for sync targets

+ 12 - 0
include/xen/grant_table.h

@@ -190,4 +190,16 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		      struct gnttab_map_grant_ref *kunmap_ops,
 		      struct page **pages, unsigned int count);
 
+/* Perform a batch of grant map/copy operations. Retry every batch slot
+ * for which the hypervisor returns GNTST_eagain. This is typically due
+ * to paged out target frames.
+ *
+ * Will retry for 1, 2, ... 255 ms, i.e. 256 times during 32 seconds.
+ *
+ * Return value in each iand every status field of the batch guaranteed
+ * to not be GNTST_eagain.
+ */
+void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count);
+void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count);
+
 #endif /* __ASM_GNTTAB_H__ */

+ 8 - 4
include/xen/interface/grant_table.h

@@ -338,7 +338,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
 #define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    unsigned long mfn;
+    xen_pfn_t mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
@@ -375,7 +375,7 @@ struct gnttab_copy {
 	struct {
 		union {
 			grant_ref_t ref;
-			unsigned long   gmfn;
+			xen_pfn_t   gmfn;
 		} u;
 		domid_t  domid;
 		uint16_t offset;
@@ -519,7 +519,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
 #define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
 #define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
 #define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
-#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+#define GNTST_eagain          (-12) /* Operation not done; try again.        */
 
 #define GNTTABOP_error_msgs {                   \
     "okay",                                     \
@@ -532,7 +534,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
     "no spare translation slot in the I/O MMU", \
     "permission denied",                        \
     "bad page",                                 \
-    "copy arguments cross page boundary"        \
+    "copy arguments cross page boundary",       \
+    "page address size too large",              \
+    "operation not done; try again"             \
 }
 
 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */

+ 6 - 3
include/xen/interface/memory.h

@@ -31,7 +31,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    GUEST_HANDLE(ulong) extent_start;
+    GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -130,7 +130,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    GUEST_HANDLE(ulong) extent_start;
+    GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -163,6 +163,9 @@ struct xen_add_to_physmap {
     /* Which domain to change the mapping for. */
     domid_t domid;
 
+    /* Number of pages to go through for gmfn_range */
+    uint16_t    size;
+
     /* Source mapping space. */
 #define XENMAPSPACE_shared_info 0 /* shared info page */
 #define XENMAPSPACE_grant_table 1 /* grant table page */
@@ -172,7 +175,7 @@ struct xen_add_to_physmap {
     unsigned long idx;
 
     /* GPFN where the source mapping page should appear. */
-    unsigned long gpfn;
+    xen_pfn_t gpfn;
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
 

+ 5 - 2
include/xen/interface/platform.h

@@ -54,7 +54,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
 #define XENPF_add_memtype         31
 struct xenpf_add_memtype {
 	/* IN variables. */
-	unsigned long mfn;
+	xen_pfn_t mfn;
 	uint64_t nr_mfns;
 	uint32_t type;
 	/* OUT variables. */
@@ -84,7 +84,7 @@ struct xenpf_read_memtype {
 	/* IN variables. */
 	uint32_t reg;
 	/* OUT variables. */
-	unsigned long mfn;
+	xen_pfn_t mfn;
 	uint64_t nr_mfns;
 	uint32_t type;
 };
@@ -112,6 +112,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
 #define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
 #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
 #define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+#define XEN_FW_KBD_SHIFT_FLAGS    5 /* Int16, Fn02: Get keyboard shift flags. */
 struct xenpf_firmware_info {
 	/* IN variables. */
 	uint32_t type;
@@ -142,6 +143,8 @@ struct xenpf_firmware_info {
 			/* must refer to 128-byte buffer */
 			GUEST_HANDLE(uchar) edid;
 		} vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+
+		uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */
 	} u;
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);

+ 3 - 0
include/xen/interface/version.h

@@ -60,4 +60,7 @@ struct xen_feature_info {
 /* arg == NULL; returns host memory page size. */
 #define XENVER_pagesize 7
 
+/* arg == xen_domain_handle_t. */
+#define XENVER_guest_handle 8
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */

+ 4 - 4
include/xen/interface/xen.h

@@ -10,7 +10,6 @@
 #define __XEN_PUBLIC_XEN_H__
 
 #include <asm/xen/interface.h>
-#include <asm/pvclock-abi.h>
 
 /*
  * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -190,7 +189,7 @@ struct mmuext_op {
 	unsigned int cmd;
 	union {
 		/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
-		unsigned long mfn;
+		xen_pfn_t mfn;
 		/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
 		unsigned long linear_addr;
 	} arg1;
@@ -430,11 +429,11 @@ struct start_info {
 	unsigned long nr_pages;     /* Total pages allocated to this domain.  */
 	unsigned long shared_info;  /* MACHINE address of shared info struct. */
 	uint32_t flags;             /* SIF_xxx flags.                         */
-	unsigned long store_mfn;    /* MACHINE page number of shared page.    */
+	xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
 	uint32_t store_evtchn;      /* Event channel for store communication. */
 	union {
 		struct {
-			unsigned long mfn;  /* MACHINE page number of console page.   */
+			xen_pfn_t mfn;      /* MACHINE page number of console page.   */
 			uint32_t  evtchn;   /* Event channel for console page.        */
 		} domU;
 		struct {
@@ -455,6 +454,7 @@ struct dom0_vga_console_info {
 	uint8_t video_type;
 #define XEN_VGATYPE_TEXT_MODE_3 0x03
 #define XEN_VGATYPE_VESA_LFB    0x23
+#define XEN_VGATYPE_EFI_LFB     0x70
 
 	union {
 		struct {

+ 24 - 3
include/xen/privcmd.h

@@ -35,8 +35,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-
-typedef unsigned long xen_pfn_t;
+#include <xen/interface/xen.h>
 
 struct privcmd_hypercall {
 	__u64 op;
@@ -59,13 +58,33 @@ struct privcmd_mmapbatch {
 	int num;     /* number of pages to populate */
 	domid_t dom; /* target domain */
 	__u64 addr;  /* virtual address */
-	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+	xen_pfn_t __user *arr; /* array of mfns - or'd with
+				  PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+	unsigned int num; /* number of pages to populate */
+	domid_t dom;      /* target domain */
+	__u64 addr;       /* virtual address */
+	const xen_pfn_t __user *arr; /* array of mfns */
+	int __user *err;  /* array of error codes */
 };
 
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
  * Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame).  On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
  */
 #define IOCTL_PRIVCMD_HYPERCALL					\
 	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
 	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
 #define IOCTL_PRIVCMD_MMAPBATCH					\
 	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2				\
+	_IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

+ 1 - 10
include/xen/swiotlb-xen.h

@@ -3,7 +3,7 @@
 
 #include <linux/swiotlb.h>
 
-extern void xen_swiotlb_init(int verbose);
+extern int xen_swiotlb_init(int verbose, bool early);
 
 extern void
 *xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -23,15 +23,6 @@ extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 				   size_t size, enum dma_data_direction dir,
 				   struct dma_attrs *attrs);
-/*
-extern int
-xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-		   enum dma_data_direction dir);
-
-extern void
-xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-		     enum dma_data_direction dir);
-*/
 extern int
 xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 			 int nelems, enum dma_data_direction dir,

+ 24 - 9
lib/swiotlb.c

@@ -170,7 +170,7 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-void __init
+static void __init
 swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
 	unsigned long bytes;
@@ -206,8 +206,9 @@ swiotlb_init(int verbose)
 int
 swiotlb_late_init_with_default_size(size_t default_size)
 {
-	unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
+	unsigned long bytes, req_nslabs = io_tlb_nslabs;
 	unsigned int order;
+	int rc = 0;
 
 	if (!io_tlb_nslabs) {
 		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
@@ -229,16 +230,32 @@ swiotlb_late_init_with_default_size(size_t default_size)
 		order--;
 	}
 
-	if (!io_tlb_start)
-		goto cleanup1;
-
+	if (!io_tlb_start) {
+		io_tlb_nslabs = req_nslabs;
+		return -ENOMEM;
+	}
 	if (order != get_order(bytes)) {
 		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
 		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
 		io_tlb_nslabs = SLABS_PER_PAGE << order;
-		bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 	}
+	rc = swiotlb_late_init_with_tbl(io_tlb_start, io_tlb_nslabs);
+	if (rc)
+		free_pages((unsigned long)io_tlb_start, order);
+	return rc;
+}
+
+int
+swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+	unsigned long i, bytes;
+
+	bytes = nslabs << IO_TLB_SHIFT;
+
+	io_tlb_nslabs = nslabs;
+	io_tlb_start = tlb;
 	io_tlb_end = io_tlb_start + bytes;
+
 	memset(io_tlb_start, 0, bytes);
 
 	/*
@@ -288,10 +305,8 @@ cleanup3:
 	io_tlb_list = NULL;
 cleanup2:
 	io_tlb_end = NULL;
-	free_pages((unsigned long)io_tlb_start, order);
 	io_tlb_start = NULL;
-cleanup1:
-	io_tlb_nslabs = req_nslabs;
+	io_tlb_nslabs = 0;
 	return -ENOMEM;
 }