9 лет назад · a57cb1c1d7
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -556,7 +556,7 @@ till "end_pgoff". ->map_pages() is called with page table locked and must
 
				 not block.  If it's not possible to reach a page without blocking,
			
 
				 filesystem should skip it. Filesystem should use do_set_pte() to setup
			
 
				 page table entry. Pointer to entry associated with the page is passed in
			
 
				-"pte" field in fault_env structure. Pointers to entries for other offsets
			
 
				+"pte" field in vm_fault structure. Pointers to entries for other offsets
			
 
				 should be calculated relative to "pte".
			
 
				 
			
 
				 	->page_mkwrite() is called when a previously read-only pte is
			
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -158,7 +158,10 @@ static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
 
				 		unsigned long attrs)
			
 
				 {
			
 
				 	phys_addr_t paddr = page_to_phys(page) + offset;
			
 
				-	_dma_cache_sync(paddr, size, dir);
			
 
				+
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		_dma_cache_sync(paddr, size, dir);
			
 
				+
			
 
				 	return plat_phys_to_dma(dev, paddr);
			
 
				 }
			
 
				 
			
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -243,7 +243,8 @@ static int needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
 
				 }
			
 
				 
			
 
				 static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
			
 
				-		enum dma_data_direction dir)
			
 
				+				    enum dma_data_direction dir,
			
 
				+				    unsigned long attrs)
			
 
				 {
			
 
				 	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
			
 
				 	struct safe_buffer *buf;
			
@@ -262,7 +263,8 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
 
				 		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
			
 
				 		buf->safe, buf->safe_dma_addr);
			
 
				 
			
 
				-	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
			
 
				+	if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
			
 
				+	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
			
 
				 		dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
			
 
				 			__func__, ptr, buf->safe, size);
			
 
				 		memcpy(buf->safe, ptr, size);
			
@@ -272,7 +274,8 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
 
				 }
			
 
				 
			
 
				 static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
			
 
				-		size_t size, enum dma_data_direction dir)
			
 
				+				size_t size, enum dma_data_direction dir,
			
 
				+				unsigned long attrs)
			
 
				 {
			
 
				 	BUG_ON(buf->size != size);
			
 
				 	BUG_ON(buf->direction != dir);
			
@@ -283,7 +286,8 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
 
				 
			
 
				 	DO_STATS(dev->archdata.dmabounce->bounce_count++);
			
 
				 
			
 
				-	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
			
 
				+	if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
			
 
				+	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
			
 
				 		void *ptr = buf->ptr;
			
 
				 
			
 
				 		dev_dbg(dev, "%s: copy back safe %p to unsafe %p size %d\n",
			
@@ -334,7 +338,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
 
				 		return DMA_ERROR_CODE;
			
 
				 	}
			
 
				 
			
 
				-	return map_single(dev, page_address(page) + offset, size, dir);
			
 
				+	return map_single(dev, page_address(page) + offset, size, dir, attrs);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -357,7 +361,7 @@ static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	unmap_single(dev, buf, size, dir);
			
 
				+	unmap_single(dev, buf, size, dir, attrs);
			
 
				 }
			
 
				 
			
 
				 static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
			
--- a/arch/avr32/mm/dma-coherent.c
+++ b/arch/avr32/mm/dma-coherent.c
@@ -146,7 +146,8 @@ static dma_addr_t avr32_dma_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	void *cpu_addr = page_address(page) + offset;
			
 
				 
			
 
				-	dma_cache_sync(dev, cpu_addr, size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_cache_sync(dev, cpu_addr, size, direction);
			
 
				 	return virt_to_bus(cpu_addr);
			
 
				 }
			
 
				 
			
@@ -162,6 +163,10 @@ static int avr32_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 		sg->dma_address = page_to_bus(sg_page(sg)) + sg->offset;
			
 
				 		virt = sg_virt(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		dma_cache_sync(dev, virt, sg->length, direction);
			
 
				 	}
			
 
				 
			
--- a/arch/blackfin/kernel/dma-mapping.c
+++ b/arch/blackfin/kernel/dma-mapping.c
@@ -118,6 +118,10 @@ static int bfin_dma_map_sg(struct device *dev, struct scatterlist *sg_list,
 
				 
			
 
				 	for_each_sg(sg_list, sg, nents, i) {
			
 
				 		sg->dma_address = (dma_addr_t) sg_virt(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		__dma_sync(sg_dma_address(sg), sg_dma_len(sg), direction);
			
 
				 	}
			
 
				 
			
@@ -143,7 +147,9 @@ static dma_addr_t bfin_dma_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	dma_addr_t handle = (dma_addr_t)(page_address(page) + offset);
			
 
				 
			
 
				-	_dma_sync(handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		_dma_sync(handle, size, dir);
			
 
				+
			
 
				 	return handle;
			
 
				 }
			
 
				 
			
--- a/arch/c6x/kernel/dma.c
+++ b/arch/c6x/kernel/dma.c
@@ -42,14 +42,17 @@ static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	dma_addr_t handle = virt_to_phys(page_address(page) + offset);
			
 
				 
			
 
				-	c6x_dma_sync(handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		c6x_dma_sync(handle, size, dir);
			
 
				+
			
 
				 	return handle;
			
 
				 }
			
 
				 
			
 
				 static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle,
			
 
				 		size_t size, enum dma_data_direction dir, unsigned long attrs)
			
 
				 {
			
 
				-	c6x_dma_sync(handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		c6x_dma_sync(handle, size, dir);
			
 
				 }
			
 
				 
			
 
				 static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
			
@@ -60,7 +63,8 @@ static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				-		c6x_dma_sync(sg->dma_address, sg->length, dir);
			
 
				+		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+			c6x_dma_sync(sg->dma_address, sg->length, dir);
			
 
				 	}
			
 
				 
			
 
				 	return nents;
			
@@ -72,9 +76,11 @@ static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 	struct scatterlist *sg;
			
 
				 	int i;
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return;
			
 
				+
			
 
				 	for_each_sg(sglist, sg, nents, i)
			
 
				 		c6x_dma_sync(sg_dma_address(sg), sg->length, dir);
			
 
				-
			
 
				 }
			
 
				 
			
 
				 static void c6x_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
			
--- a/arch/frv/mb93090-mb00/pci-dma-nommu.c
+++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c
@@ -109,16 +109,19 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 		int nents, enum dma_data_direction direction,
			
 
				 		unsigned long attrs)
			
 
				 {
			
 
				-	int i;
			
 
				 	struct scatterlist *sg;
			
 
				+	int i;
			
 
				+
			
 
				+	BUG_ON(direction == DMA_NONE);
			
 
				+
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return nents;
			
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				 		frv_cache_wback_inv(sg_dma_address(sg),
			
 
				 				    sg_dma_address(sg) + sg_dma_len(sg));
			
 
				 	}
			
 
				 
			
 
				-	BUG_ON(direction == DMA_NONE);
			
 
				-
			
 
				 	return nents;
			
 
				 }
			
 
				 
			
@@ -127,7 +130,10 @@ static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page,
 
				 		enum dma_data_direction direction, unsigned long attrs)
			
 
				 {
			
 
				 	BUG_ON(direction == DMA_NONE);
			
 
				-	flush_dcache_page(page);
			
 
				+
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		flush_dcache_page(page);
			
 
				+
			
 
				 	return (dma_addr_t) page_to_phys(page) + offset;
			
 
				 }
			
 
				 
			
--- a/arch/frv/mb93090-mb00/pci-dma.c
+++ b/arch/frv/mb93090-mb00/pci-dma.c
@@ -40,13 +40,16 @@ static int frv_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 		int nents, enum dma_data_direction direction,
			
 
				 		unsigned long attrs)
			
 
				 {
			
 
				+	struct scatterlist *sg;
			
 
				 	unsigned long dampr2;
			
 
				 	void *vaddr;
			
 
				 	int i;
			
 
				-	struct scatterlist *sg;
			
 
				 
			
 
				 	BUG_ON(direction == DMA_NONE);
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return nents;
			
 
				+
			
 
				 	dampr2 = __get_DAMPR(2);
			
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
@@ -70,7 +73,9 @@ static dma_addr_t frv_dma_map_page(struct device *dev, struct page *page,
 
				 		unsigned long offset, size_t size,
			
 
				 		enum dma_data_direction direction, unsigned long attrs)
			
 
				 {
			
 
				-	flush_dcache_page(page);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		flush_dcache_page(page);
			
 
				+
			
 
				 	return (dma_addr_t) page_to_phys(page) + offset;
			
 
				 }
			
 
				 
			
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -119,6 +119,9 @@ static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg,
 
				 
			
 
				 		s->dma_length = s->length;
			
 
				 
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		flush_dcache_range(dma_addr_to_virt(s->dma_address),
			
 
				 				   dma_addr_to_virt(s->dma_address + s->length));
			
 
				 	}
			
@@ -180,7 +183,8 @@ static dma_addr_t hexagon_map_page(struct device *dev, struct page *page,
 
				 	if (!check_addr("map_single", dev, bus, size))
			
 
				 		return bad_dma_address;
			
 
				 
			
 
				-	dma_sync(dma_addr_to_virt(bus), size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_sync(dma_addr_to_virt(bus), size, dir);
			
 
				 
			
 
				 	return bus;
			
 
				 }
			
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -134,7 +134,9 @@ static dma_addr_t m68k_dma_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	dma_addr_t handle = page_to_phys(page) + offset;
			
 
				 
			
 
				-	dma_sync_single_for_device(dev, handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_sync_single_for_device(dev, handle, size, dir);
			
 
				+
			
 
				 	return handle;
			
 
				 }
			
 
				 
			
@@ -146,6 +148,10 @@ static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		dma_sync_single_for_device(dev, sg->dma_address, sg->length,
			
 
				 					   dir);
			
 
				 	}
			
--- a/arch/metag/kernel/dma.c
+++ b/arch/metag/kernel/dma.c
@@ -484,8 +484,9 @@ static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page,
 
				 		unsigned long offset, size_t size,
			
 
				 		enum dma_data_direction direction, unsigned long attrs)
			
 
				 {
			
 
				-	dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
			
 
				-			    direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_sync_for_device((void *)(page_to_phys(page) + offset),
			
 
				+				    size, direction);
			
 
				 	return page_to_phys(page) + offset;
			
 
				 }
			
 
				 
			
@@ -493,7 +494,8 @@ static void metag_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 
				 		size_t size, enum dma_data_direction direction,
			
 
				 		unsigned long attrs)
			
 
				 {
			
 
				-	dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
			
 
				 }
			
 
				 
			
 
				 static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist,
			
@@ -507,6 +509,10 @@ static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 		BUG_ON(!sg_page(sg));
			
 
				 
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		dma_sync_for_device(sg_virt(sg), sg->length, direction);
			
 
				 	}
			
 
				 
			
@@ -525,6 +531,10 @@ static void metag_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 		BUG_ON(!sg_page(sg));
			
 
				 
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
			
 
				 	}
			
 
				 }
			
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -61,6 +61,10 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 
				 	/* FIXME this part of code is untested */
			
 
				 	for_each_sg(sgl, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
			
 
				 							sg->length, direction);
			
 
				 	}
			
@@ -80,7 +84,8 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 
				 					     enum dma_data_direction direction,
			
 
				 					     unsigned long attrs)
			
 
				 {
			
 
				-	__dma_sync(page_to_phys(page) + offset, size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_sync(page_to_phys(page) + offset, size, direction);
			
 
				 	return page_to_phys(page) + offset;
			
 
				 }
			
 
				 
			
@@ -95,7 +100,8 @@ static inline void dma_direct_unmap_page(struct device *dev,
 
				  * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
			
 
				  * dma_address is physical address
			
 
				  */
			
 
				-	__dma_sync(dma_address, size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_sync(dma_address, size, direction);
			
 
				 }
			
 
				 
			
 
				 static inline void
			
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -61,7 +61,7 @@ static int loongson_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
				 				int nents, enum dma_data_direction dir,
			
 
				 				unsigned long attrs)
			
 
				 {
			
 
				-	int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, 0);
			
 
				+	int r = swiotlb_map_sg_attrs(dev, sg, nents, dir, attrs);
			
 
				 	mb();
			
 
				 
			
 
				 	return r;
			
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -293,7 +293,7 @@ static inline void __dma_sync(struct page *page,
 
				 static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
			
 
				 	size_t size, enum dma_data_direction direction, unsigned long attrs)
			
 
				 {
			
 
				-	if (cpu_needs_post_dma_flush(dev))
			
 
				+	if (cpu_needs_post_dma_flush(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 		__dma_sync(dma_addr_to_page(dev, dma_addr),
			
 
				 			   dma_addr & ~PAGE_MASK, size, direction);
			
 
				 	plat_post_dma_flush(dev);
			
@@ -307,7 +307,8 @@ static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 	struct scatterlist *sg;
			
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				-		if (!plat_device_is_coherent(dev))
			
 
				+		if (!plat_device_is_coherent(dev) &&
			
 
				+		    !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 			__dma_sync(sg_page(sg), sg->offset, sg->length,
			
 
				 				   direction);
			
 
				 #ifdef CONFIG_NEED_SG_DMA_LENGTH
			
@@ -324,7 +325,7 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page,
 
				 	unsigned long offset, size_t size, enum dma_data_direction direction,
			
 
				 	unsigned long attrs)
			
 
				 {
			
 
				-	if (!plat_device_is_coherent(dev))
			
 
				+	if (!plat_device_is_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 		__dma_sync(page, offset, size, direction);
			
 
				 
			
 
				 	return plat_map_dma_mem_page(dev, page) + offset;
			
@@ -339,6 +340,7 @@ static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 	for_each_sg(sglist, sg, nhwentries, i) {
			
 
				 		if (!plat_device_is_coherent(dev) &&
			
 
				+		    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
			
 
				 		    direction != DMA_TO_DEVICE)
			
 
				 			__dma_sync(sg_page(sg), sg->offset, sg->length,
			
 
				 				   direction);
			
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -98,13 +98,17 @@ static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
				 	int i;
			
 
				 
			
 
				 	for_each_sg(sg, sg, nents, i) {
			
 
				-		void *addr;
			
 
				+		void *addr = sg_virt(sg);
			
 
				 
			
 
				-		addr = sg_virt(sg);
			
 
				-		if (addr) {
			
 
				-			__dma_sync_for_device(addr, sg->length, direction);
			
 
				-			sg->dma_address = sg_phys(sg);
			
 
				-		}
			
 
				+		if (!addr)
			
 
				+			continue;
			
 
				+
			
 
				+		sg->dma_address = sg_phys(sg);
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				+		__dma_sync_for_device(addr, sg->length, direction);
			
 
				 	}
			
 
				 
			
 
				 	return nents;
			
@@ -117,7 +121,9 @@ static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	void *addr = page_address(page) + offset;
			
 
				 
			
 
				-	__dma_sync_for_device(addr, size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_sync_for_device(addr, size, direction);
			
 
				+
			
 
				 	return page_to_phys(page) + offset;
			
 
				 }
			
 
				 
			
@@ -125,7 +131,8 @@ static void nios2_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 
				 		size_t size, enum dma_data_direction direction,
			
 
				 		unsigned long attrs)
			
 
				 {
			
 
				-	__dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
			
 
				 }
			
 
				 
			
 
				 static void nios2_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
			
@@ -138,6 +145,9 @@ static void nios2_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 
				 	if (direction == DMA_TO_DEVICE)
			
 
				 		return;
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return;
			
 
				+
			
 
				 	for_each_sg(sg, sg, nhwentries, i) {
			
 
				 		addr = sg_virt(sg);
			
 
				 		if (addr)
			
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -141,6 +141,9 @@ or1k_map_page(struct device *dev, struct page *page,
 
				 	unsigned long cl;
			
 
				 	dma_addr_t addr = page_to_phys(page) + offset;
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return addr;
			
 
				+
			
 
				 	switch (dir) {
			
 
				 	case DMA_TO_DEVICE:
			
 
				 		/* Flush the dcache for the requested range */
			
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -459,7 +459,9 @@ static dma_addr_t pa11_dma_map_page(struct device *dev, struct page *page,
 
				 	void *addr = page_address(page) + offset;
			
 
				 	BUG_ON(direction == DMA_NONE);
			
 
				 
			
 
				-	flush_kernel_dcache_range((unsigned long) addr, size);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		flush_kernel_dcache_range((unsigned long) addr, size);
			
 
				+
			
 
				 	return virt_to_phys(addr);
			
 
				 }
			
 
				 
			
@@ -469,8 +471,11 @@ static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
				 {
			
 
				 	BUG_ON(direction == DMA_NONE);
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return;
			
 
				+
			
 
				 	if (direction == DMA_TO_DEVICE)
			
 
				-	    return;
			
 
				+		return;
			
 
				 
			
 
				 	/*
			
 
				 	 * For PCI_DMA_FROMDEVICE this flush is not necessary for the
			
@@ -479,7 +484,6 @@ static void pa11_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
				 	 */
			
 
				 
			
 
				 	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle), size);
			
 
				-	return;
			
 
				 }
			
 
				 
			
 
				 static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
			
@@ -496,6 +500,10 @@ static int pa11_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 		sg_dma_address(sg) = (dma_addr_t) virt_to_phys(vaddr);
			
 
				 		sg_dma_len(sg) = sg->length;
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		flush_kernel_dcache_range(vaddr, sg->length);
			
 
				 	}
			
 
				 	return nents;
			
@@ -510,14 +518,16 @@ static void pa11_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 	BUG_ON(direction == DMA_NONE);
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return;
			
 
				+
			
 
				 	if (direction == DMA_TO_DEVICE)
			
 
				-	    return;
			
 
				+		return;
			
 
				 
			
 
				 	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
			
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i)
			
 
				 		flush_kernel_vmap_range(sg_virt(sg), sg->length);
			
 
				-	return;
			
 
				 }
			
 
				 
			
 
				 static void pa11_dma_sync_single_for_cpu(struct device *dev,
			
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -203,6 +203,10 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 
				 	for_each_sg(sgl, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
			
 
				 		sg->dma_length = sg->length;
			
 
				+
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+
			
 
				 		__dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
			
 
				 	}
			
 
				 
			
@@ -235,7 +239,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 
				 					     unsigned long attrs)
			
 
				 {
			
 
				 	BUG_ON(dir == DMA_NONE);
			
 
				-	__dma_sync_page(page, offset, size, dir);
			
 
				+
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_sync_page(page, offset, size, dir);
			
 
				+
			
 
				 	return page_to_phys(page) + offset + get_dma_offset(dev);
			
 
				 }
			
 
				 
			
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -236,7 +236,6 @@ static int
 
				 spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct spu_context *ctx	= vma->vm_file->private_data;
			
 
				-	unsigned long address = (unsigned long)vmf->virtual_address;
			
 
				 	unsigned long pfn, offset;
			
 
				 
			
 
				 	offset = vmf->pgoff << PAGE_SHIFT;
			
@@ -244,7 +243,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 		return VM_FAULT_SIGBUS;
			
 
				 
			
 
				 	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
			
 
				-			address, offset);
			
 
				+			vmf->address, offset);
			
 
				 
			
 
				 	if (spu_acquire(ctx))
			
 
				 		return VM_FAULT_NOPAGE;
			
@@ -256,7 +255,7 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
			
 
				 		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
			
 
				 	}
			
 
				-	vm_insert_pfn(vma, address, pfn);
			
 
				+	vm_insert_pfn(vma, vmf->address, pfn);
			
 
				 
			
 
				 	spu_release(ctx);
			
 
				 
			
@@ -355,8 +354,7 @@ static int spufs_ps_fault(struct vm_area_struct *vma,
 
				 		down_read(&current->mm->mmap_sem);
			
 
				 	} else {
			
 
				 		area = ctx->spu->problem_phys + ps_offs;
			
 
				-		vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
			
 
				-					(area + offset) >> PAGE_SHIFT);
			
 
				+		vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
			
 
				 		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
			
 
				 	}
			
 
				 
			
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -18,7 +18,9 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 
				 	dma_addr_t addr = page_to_phys(page) + offset;
			
 
				 
			
 
				 	WARN_ON(size == 0);
			
 
				-	dma_cache_sync(dev, page_address(page) + offset, size, dir);
			
 
				+
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		dma_cache_sync(dev, page_address(page) + offset, size, dir);
			
 
				 
			
 
				 	return addr;
			
 
				 }
			
@@ -35,7 +37,8 @@ static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
 
				 	for_each_sg(sg, s, nents, i) {
			
 
				 		BUG_ON(!sg_page(s));
			
 
				 
			
 
				-		dma_cache_sync(dev, sg_virt(s), s->length, dir);
			
 
				+		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+			dma_cache_sync(dev, sg_virt(s), s->length, dir);
			
 
				 
			
 
				 		s->dma_address = sg_phys(s);
			
 
				 		s->dma_length = s->length;
			
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -415,7 +415,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 
				 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
			
 
				 
			
 
				 	/* Step 1: Kick data out of streaming buffers if necessary. */
			
 
				-	if (strbuf->strbuf_enabled)
			
 
				+	if (strbuf->strbuf_enabled && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 		strbuf_flush(strbuf, iommu, bus_addr, ctx,
			
 
				 			     npages, direction);
			
 
				 
			
@@ -640,7 +640,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 		base = iommu->page_table + entry;
			
 
				 
			
 
				 		dma_handle &= IO_PAGE_MASK;
			
 
				-		if (strbuf->strbuf_enabled)
			
 
				+		if (strbuf->strbuf_enabled && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 			strbuf_flush(strbuf, iommu, dma_handle, ctx,
			
 
				 				     npages, direction);
			
 
				 
			
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -527,7 +527,7 @@ static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
 
				 static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
			
 
				 			     enum dma_data_direction dir, unsigned long attrs)
			
 
				 {
			
 
				-	if (dir != PCI_DMA_TODEVICE)
			
 
				+	if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				 		dma_make_coherent(ba, PAGE_ALIGN(size));
			
 
				 }
			
 
				 
			
@@ -572,7 +572,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
 
				 	struct scatterlist *sg;
			
 
				 	int n;
			
 
				 
			
 
				-	if (dir != PCI_DMA_TODEVICE) {
			
 
				+	if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
			
 
				 		for_each_sg(sgl, sg, nents, n) {
			
 
				 			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
			
 
				 		}
			
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@ static int panic_on_timeout;
 
				  */
			
 
				 atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
			
 
				 EXPORT_SYMBOL(nmi_active);
			
 
				-
			
 
				+static int nmi_init_done;
			
 
				 static unsigned int nmi_hz = HZ;
			
 
				 static DEFINE_PER_CPU(short, wd_enabled);
			
 
				 static int endflag __initdata;
			
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
 
				 
			
 
				 void stop_nmi_watchdog(void *unused)
			
 
				 {
			
 
				+	if (!__this_cpu_read(wd_enabled))
			
 
				+		return;
			
 
				 	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
			
 
				 	__this_cpu_write(wd_enabled, 0);
			
 
				 	atomic_dec(&nmi_active);
			
@@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void)
 
				 
			
 
				 void start_nmi_watchdog(void *unused)
			
 
				 {
			
 
				+	if (__this_cpu_read(wd_enabled))
			
 
				+		return;
			
 
				+
			
 
				 	__this_cpu_write(wd_enabled, 1);
			
 
				 	atomic_inc(&nmi_active);
			
 
				 
			
@@ -259,6 +264,8 @@ int __init nmi_init(void)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	nmi_init_done = 1;
			
 
				+
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
 
				 	return 0;
			
 
				 }
			
 
				 __setup("nmi_watchdog=", setup_nmi_watchdog);
			
 
				+
			
 
				+/*
			
 
				+ * sparc specific NMI watchdog enable function.
			
 
				+ * Enables watchdog if it is not enabled already.
			
 
				+ */
			
 
				+int watchdog_nmi_enable(unsigned int cpu)
			
 
				+{
			
 
				+	if (atomic_read(&nmi_active) == -1) {
			
 
				+		pr_warn("NMI watchdog cannot be enabled or disabled\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * watchdog thread could start even before nmi_init is called.
			
 
				+	 * Just Return in that case. Let nmi_init finish the init
			
 
				+	 * process first.
			
 
				+	 */
			
 
				+	if (!nmi_init_done)
			
 
				+		return 0;
			
 
				+
			
 
				+	smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+/*
			
 
				+ * sparc specific NMI watchdog disable function.
			
 
				+ * Disables watchdog if it is not disabled already.
			
 
				+ */
			
 
				+void watchdog_nmi_disable(unsigned int cpu)
			
 
				+{
			
 
				+	if (atomic_read(&nmi_active) == -1)
			
 
				+		pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
			
 
				+	else
			
 
				+		smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
			
 
				+}
			
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -213,10 +213,12 @@ static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
 
				 
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				-		__dma_prep_pa_range(sg->dma_address, sg->length, direction);
			
 
				 #ifdef CONFIG_NEED_SG_DMA_LENGTH
			
 
				 		sg->dma_length = sg->length;
			
 
				 #endif
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				+		__dma_prep_pa_range(sg->dma_address, sg->length, direction);
			
 
				 	}
			
 
				 
			
 
				 	return nents;
			
@@ -232,6 +234,8 @@ static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
				 	BUG_ON(!valid_dma_direction(direction));
			
 
				 	for_each_sg(sglist, sg, nents, i) {
			
 
				 		sg->dma_address = sg_phys(sg);
			
 
				+		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+			continue;
			
 
				 		__dma_complete_pa_range(sg->dma_address, sg->length,
			
 
				 					direction);
			
 
				 	}
			
@@ -245,7 +249,8 @@ static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
 
				 	BUG_ON(!valid_dma_direction(direction));
			
 
				 
			
 
				 	BUG_ON(offset + size > PAGE_SIZE);
			
 
				-	__dma_prep_page(page, offset, size, direction);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		__dma_prep_page(page, offset, size, direction);
			
 
				 
			
 
				 	return page_to_pa(page) + offset;
			
 
				 }
			
@@ -256,6 +261,9 @@ static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 
				 {
			
 
				 	BUG_ON(!valid_dma_direction(direction));
			
 
				 
			
 
				+	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
			
 
				+		return;
			
 
				+
			
 
				 	__dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
			
 
				 			    dma_address & (PAGE_SIZE - 1), size, direction);
			
 
				 }
			
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -109,7 +109,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 
				 		return VM_FAULT_SIGBUS;
			
 
				 
			
 
				 	if (sym_offset == image->sym_vvar_page) {
			
 
				-		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
			
 
				+		ret = vm_insert_pfn(vma, vmf->address,
			
 
				 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
			
 
				 	} else if (sym_offset == image->sym_pvclock_page) {
			
 
				 		struct pvclock_vsyscall_time_info *pvti =
			
@@ -117,7 +117,7 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 
				 		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
			
 
				 			ret = vm_insert_pfn(
			
 
				 				vma,
			
 
				-				(unsigned long)vmf->virtual_address,
			
 
				+				vmf->address,
			
 
				 				__pa(pvti) >> PAGE_SHIFT);
			
 
				 		}
			
 
				 	}
			
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -328,7 +328,7 @@ void machine_kexec(struct kimage *image)
 
				 
			
 
				 void arch_crash_save_vmcoreinfo(void)
			
 
				 {
			
 
				-	VMCOREINFO_SYMBOL(phys_base);
			
 
				+	VMCOREINFO_NUMBER(phys_base);
			
 
				 	VMCOREINFO_SYMBOL(init_level4_pgt);
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
@@ -337,9 +337,7 @@ void arch_crash_save_vmcoreinfo(void)
 
				 #endif
			
 
				 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n",
			
 
				 			      kaslr_offset());
			
 
				-	VMCOREINFO_PAGE_OFFSET(PAGE_OFFSET);
			
 
				-	VMCOREINFO_VMALLOC_START(VMALLOC_START);
			
 
				-	VMCOREINFO_VMEMMAP_START(VMEMMAP_START);
			
 
				+	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
			
 
				 }
			
 
				 
			
 
				 /* arch-dependent functionality related to kexec file-based syscall */
			
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -189,7 +189,9 @@ static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
 
				 {
			
 
				 	dma_addr_t dma_handle = page_to_phys(page) + offset;
			
 
				 
			
 
				-	xtensa_sync_single_for_device(dev, dma_handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		xtensa_sync_single_for_device(dev, dma_handle, size, dir);
			
 
				+
			
 
				 	return dma_handle;
			
 
				 }
			
 
				 
			
@@ -197,7 +199,8 @@ static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
 
				 			      size_t size, enum dma_data_direction dir,
			
 
				 			      unsigned long attrs)
			
 
				 {
			
 
				-	xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
			
 
				+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
			
 
				+		xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
			
 
				 }
			
 
				 
			
 
				 static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
			
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -19,8 +19,7 @@ static int alpha_core_agp_vm_fault(struct vm_area_struct *vma,
 
				 	unsigned long pa;
			
 
				 	struct page *page;
			
 
				 
			
 
				-	dma_addr = (unsigned long)vmf->virtual_address - vma->vm_start
			
 
				-						+ agp->aperture.bus_base;
			
 
				+	dma_addr = vmf->address - vma->vm_start + agp->aperture.bus_base;
			
 
				 	pa = agp->ops->translate(agp, dma_addr);
			
 
				 
			
 
				 	if (pa == (unsigned long)-EINVAL)
			
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -227,7 +227,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	 * be because another thread has installed the pte first, so it
			
 
				 	 * is no problem.
			
 
				 	 */
			
 
				-	vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
			
 
				+	vm_insert_pfn(vma, vmf->address, pfn);
			
 
				 
			
 
				 	return VM_FAULT_NOPAGE;
			
 
				 }
			
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(tpm_put_ops);
 
				  *
			
 
				  * The return'd chip has been tpm_try_get_ops'd and must be released via
			
 
				  * tpm_put_ops
			
 
				-  */
			
 
				+ */
			
 
				 struct tpm_chip *tpm_chip_find_get(int chip_num)
			
 
				 {
			
 
				 	struct tpm_chip *chip, *res = NULL;
			
@@ -103,7 +103,7 @@ struct tpm_chip *tpm_chip_find_get(int chip_num)
 
				 			}
			
 
				 		} while (chip_prev != chip_num);
			
 
				 	} else {
			
 
				-		chip = idr_find_slowpath(&dev_nums_idr, chip_num);
			
 
				+		chip = idr_find(&dev_nums_idr, chip_num);
			
 
				 		if (chip && !tpm_try_get_ops(chip))
			
 
				 			res = chip;
			
 
				 	}
			
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -328,7 +328,6 @@ static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
 
				 static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
			
 
				 		struct vm_fault *vmf)
			
 
				 {
			
 
				-	unsigned long vaddr = (unsigned long) vmf->virtual_address;
			
 
				 	struct device *dev = &dax_dev->dev;
			
 
				 	struct dax_region *dax_region;
			
 
				 	int rc = VM_FAULT_SIGBUS;
			
@@ -353,7 +352,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
 
				 
			
 
				 	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
			
 
				 
			
 
				-	rc = vm_insert_mixed(vma, vaddr, pfn);
			
 
				+	rc = vm_insert_mixed(vma, vmf->address, pfn);
			
 
				 
			
 
				 	if (rc == -ENOMEM)
			
 
				 		return VM_FAULT_OOM;
			
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -17,12 +17,11 @@
 
				 static int armada_gem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct armada_gem_object *obj = drm_to_armada_gem(vma->vm_private_data);
			
 
				-	unsigned long addr = (unsigned long)vmf->virtual_address;
			
 
				 	unsigned long pfn = obj->phys_addr >> PAGE_SHIFT;
			
 
				 	int ret;
			
 
				 
			
 
				-	pfn += (addr - vma->vm_start) >> PAGE_SHIFT;
			
 
				-	ret = vm_insert_pfn(vma, addr, pfn);
			
 
				+	pfn += (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				+	ret = vm_insert_pfn(vma, vmf->address, pfn);
			
 
				 
			
 
				 	switch (ret) {
			
 
				 	case 0:
			
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -124,8 +124,7 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 		 * Using vm_pgoff as a selector forces us to use this unusual
			
 
				 		 * addressing scheme.
			
 
				 		 */
			
 
				-		resource_size_t offset = (unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start;
			
 
				+		resource_size_t offset = vmf->address - vma->vm_start;
			
 
				 		resource_size_t baddr = map->offset + offset;
			
 
				 		struct drm_agp_mem *agpmem;
			
 
				 		struct page *page;
			
@@ -195,7 +194,7 @@ static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (!map)
			
 
				 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
			
 
				 
			
 
				-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
			
 
				+	offset = vmf->address - vma->vm_start;
			
 
				 	i = (unsigned long)map->handle + offset;
			
 
				 	page = vmalloc_to_page((void *)i);
			
 
				 	if (!page)
			
@@ -301,7 +300,8 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (!dma->pagelist)
			
 
				 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
			
 
				 
			
 
				-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;	/* vm_[pg]off[set] should be 0 */
			
 
				+	offset = vmf->address - vma->vm_start;
			
 
				+					/* vm_[pg]off[set] should be 0 */
			
 
				 	page_nr = offset >> PAGE_SHIFT; /* page_nr could just be vmf->pgoff */
			
 
				 	page = virt_to_page((void *)dma->pagelist[page_nr]);
			
 
				 
			
@@ -337,7 +337,7 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (!entry->pagelist)
			
 
				 		return VM_FAULT_SIGBUS;	/* Nothing allocated */
			
 
				 
			
 
				-	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
			
 
				+	offset = vmf->address - vma->vm_start;
			
 
				 	map_offset = map->offset - (unsigned long)dev->sg->virtual;
			
 
				 	page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT);
			
 
				 	page = entry->pagelist[page_offset];
			
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -202,15 +202,14 @@ int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	}
			
 
				 
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset: */
			
 
				-	pgoff = ((unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start) >> PAGE_SHIFT;
			
 
				+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	page = pages[pgoff];
			
 
				 
			
 
				-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
			
 
				+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
			
 
				 	     page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
			
 
				 
			
 
				-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
			
 
				+	ret = vm_insert_page(vma, vmf->address, page);
			
 
				 
			
 
				 out:
			
 
				 	switch (ret) {
			
@@ -759,7 +758,7 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
				 	down_read(&mm->mmap_sem);
			
 
				 	while (pinned < npages) {
			
 
				 		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
			
 
				-					    flags, pvec + pinned, NULL);
			
 
				+					    flags, pvec + pinned, NULL, NULL);
			
 
				 		if (ret < 0)
			
 
				 			break;
			
 
				 
			
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -455,8 +455,7 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	pgoff_t page_offset;
			
 
				 	int ret;
			
 
				 
			
 
				-	page_offset = ((unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start) >> PAGE_SHIFT;
			
 
				+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
			
 
				 		DRM_ERROR("invalid page offset\n");
			
@@ -465,8 +464,7 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	}
			
 
				 
			
 
				 	pfn = page_to_pfn(exynos_gem->pages[page_offset]);
			
 
				-	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
			
 
				-			__pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				+	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				 
			
 
				 out:
			
 
				 	switch (ret) {
			
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -125,7 +125,7 @@ static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 				  psbfb->gtt->offset;
			
 
				 
			
 
				 	page_num = vma_pages(vma);
			
 
				-	address = (unsigned long)vmf->virtual_address - (vmf->pgoff << PAGE_SHIFT);
			
 
				+	address = vmf->address - (vmf->pgoff << PAGE_SHIFT);
			
 
				 
			
 
				 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
			
 
				 
			
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -197,15 +197,14 @@ int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 
			
 
				 	/* Page relative to the VMA start - we must calculate this ourselves
			
 
				 	   because vmf->pgoff is the fake GEM offset */
			
 
				-	page_offset = ((unsigned long) vmf->virtual_address - vma->vm_start)
			
 
				-				>> PAGE_SHIFT;
			
 
				+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	/* CPU view of the page, don't go via the GART for CPU writes */
			
 
				 	if (r->stolen)
			
 
				 		pfn = (dev_priv->stolen_base + r->offset) >> PAGE_SHIFT;
			
 
				 	else
			
 
				 		pfn = page_to_pfn(r->pages[page_offset]);
			
 
				-	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
			
 
				+	ret = vm_insert_pfn(vma, vmf->address, pfn);
			
 
				 
			
 
				 fail:
			
 
				 	mutex_unlock(&dev_priv->mmap_mutex);
			
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1796,8 +1796,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 
				 	int ret;
			
 
				 
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset */
			
 
				-	page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
			
 
				-		PAGE_SHIFT;
			
 
				+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	trace_i915_gem_object_fault(obj, page_offset, true, write);
			
 
				 
			
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -515,7 +515,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
				 					 obj->userptr.ptr + pinned * PAGE_SIZE,
			
 
				 					 npages - pinned,
			
 
				 					 flags,
			
 
				-					 pvec + pinned, NULL);
			
 
				+					 pvec + pinned, NULL, NULL);
			
 
				 				if (ret < 0)
			
 
				 					break;
			
 
				 
			
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -225,16 +225,14 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	}
			
 
				 
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset: */
			
 
				-	pgoff = ((unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start) >> PAGE_SHIFT;
			
 
				+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	pfn = page_to_pfn(pages[pgoff]);
			
 
				 
			
 
				-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
			
 
				+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
			
 
				 			pfn, pfn << PAGE_SHIFT);
			
 
				 
			
 
				-	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
			
 
				-			__pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				+	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				 
			
 
				 out_unlock:
			
 
				 	mutex_unlock(&dev->struct_mutex);
			
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -398,8 +398,7 @@ static int fault_1d(struct drm_gem_object *obj,
 
				 	pgoff_t pgoff;
			
 
				 
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset: */
			
 
				-	pgoff = ((unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start) >> PAGE_SHIFT;
			
 
				+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	if (omap_obj->pages) {
			
 
				 		omap_gem_cpu_sync(obj, pgoff);
			
@@ -409,11 +408,10 @@ static int fault_1d(struct drm_gem_object *obj,
 
				 		pfn = (omap_obj->paddr >> PAGE_SHIFT) + pgoff;
			
 
				 	}
			
 
				 
			
 
				-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
			
 
				+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
			
 
				 			pfn, pfn << PAGE_SHIFT);
			
 
				 
			
 
				-	return vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
			
 
				-			__pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				+	return vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				 }
			
 
				 
			
 
				 /* Special handling for the case of faulting in 2d tiled buffers */
			
@@ -427,7 +425,7 @@ static int fault_2d(struct drm_gem_object *obj,
 
				 	struct page *pages[64];  /* XXX is this too much to have on stack? */
			
 
				 	unsigned long pfn;
			
 
				 	pgoff_t pgoff, base_pgoff;
			
 
				-	void __user *vaddr;
			
 
				+	unsigned long vaddr;
			
 
				 	int i, ret, slots;
			
 
				 
			
 
				 	/*
			
@@ -447,8 +445,7 @@ static int fault_2d(struct drm_gem_object *obj,
 
				 	const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE);
			
 
				 
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset: */
			
 
				-	pgoff = ((unsigned long)vmf->virtual_address -
			
 
				-			vma->vm_start) >> PAGE_SHIFT;
			
 
				+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	/*
			
 
				 	 * Actual address we start mapping at is rounded down to previous slot
			
@@ -459,7 +456,7 @@ static int fault_2d(struct drm_gem_object *obj,
 
				 	/* figure out buffer width in slots */
			
 
				 	slots = omap_obj->width >> priv->usergart[fmt].slot_shift;
			
 
				 
			
 
				-	vaddr = vmf->virtual_address - ((pgoff - base_pgoff) << PAGE_SHIFT);
			
 
				+	vaddr = vmf->address - ((pgoff - base_pgoff) << PAGE_SHIFT);
			
 
				 
			
 
				 	entry = &priv->usergart[fmt].entry[priv->usergart[fmt].last];
			
 
				 
			
@@ -503,12 +500,11 @@ static int fault_2d(struct drm_gem_object *obj,
 
				 
			
 
				 	pfn = entry->paddr >> PAGE_SHIFT;
			
 
				 
			
 
				-	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
			
 
				+	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
			
 
				 			pfn, pfn << PAGE_SHIFT);
			
 
				 
			
 
				 	for (i = n; i > 0; i--) {
			
 
				-		vm_insert_mixed(vma, (unsigned long)vaddr,
			
 
				-				__pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				+		vm_insert_mixed(vma, vaddr, __pfn_to_pfn_t(pfn, PFN_DEV));
			
 
				 		pfn += priv->usergart[fmt].stride_pfn;
			
 
				 		vaddr += PAGE_SIZE * m;
			
 
				 	}
			
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -452,10 +452,10 @@ static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (!bo->pages)
			
 
				 		return VM_FAULT_SIGBUS;
			
 
				 
			
 
				-	offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
			
 
				+	offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 	page = bo->pages[offset];
			
 
				 
			
 
				-	err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
			
 
				+	err = vm_insert_page(vma, vmf->address, page);
			
 
				 	switch (err) {
			
 
				 	case -EAGAIN:
			
 
				 	case 0:
			
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -101,7 +101,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	struct page *page;
			
 
				 	int ret;
			
 
				 	int i;
			
 
				-	unsigned long address = (unsigned long)vmf->virtual_address;
			
 
				+	unsigned long address = vmf->address;
			
 
				 	int retval = VM_FAULT_NOPAGE;
			
 
				 	struct ttm_mem_type_manager *man =
			
 
				 		&bdev->man[bo->mem.mem_type];
			
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -107,14 +107,13 @@ int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	unsigned int page_offset;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
			
 
				-		PAGE_SHIFT;
			
 
				+	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
			
 
				 
			
 
				 	if (!obj->pages)
			
 
				 		return VM_FAULT_SIGBUS;
			
 
				 
			
 
				 	page = obj->pages[page_offset];
			
 
				-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
			
 
				+	ret = vm_insert_page(vma, vmf->address, page);
			
 
				 	switch (ret) {
			
 
				 	case -EAGAIN:
			
 
				 	case 0:
			
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -54,7 +54,7 @@ static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 {
			
 
				 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
			
 
				 	/* We don't use vmf->pgoff since that has the fake offset */
			
 
				-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
			
 
				+	unsigned long vaddr = vmf->address;
			
 
				 	struct page *page;
			
 
				 
			
 
				 	page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
			
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -578,7 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
				 		 */
			
 
				 		npages = get_user_pages_remote(owning_process, owning_mm,
			
 
				 				user_virt, gup_num_pages,
			
 
				-				flags, local_page_list, NULL);
			
 
				+				flags, local_page_list, NULL, NULL);
			
 
				 		up_read(&owning_mm->mmap_sem);
			
 
				 
			
 
				 		if (npages < 0)
			
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -439,13 +439,12 @@ static int videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	struct page *page;
			
 
				 
			
 
				 	dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n",
			
 
				-		(unsigned long)vmf->virtual_address,
			
 
				-		vma->vm_start, vma->vm_end);
			
 
				+		vmf->address, vma->vm_start, vma->vm_end);
			
 
				 
			
 
				 	page = alloc_page(GFP_USER | __GFP_DMA32);
			
 
				 	if (!page)
			
 
				 		return VM_FAULT_OOM;
			
 
				-	clear_user_highpage(page, (unsigned long)vmf->virtual_address);
			
 
				+	clear_user_highpage(page, vmf->address);
			
 
				 	vmf->page = page;
			
 
				 
			
 
				 	return 0;
			
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -117,13 +117,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
 
				 static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct cxl_context *ctx = vma->vm_file->private_data;
			
 
				-	unsigned long address = (unsigned long)vmf->virtual_address;
			
 
				 	u64 area, offset;
			
 
				 
			
 
				 	offset = vmf->pgoff << PAGE_SHIFT;
			
 
				 
			
 
				 	pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n",
			
 
				-			__func__, ctx->pe, address, offset);
			
 
				+			__func__, ctx->pe, vmf->address, offset);
			
 
				 
			
 
				 	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
			
 
				 		area = ctx->afu->psn_phys;
			
@@ -155,7 +154,7 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 		return VM_FAULT_SIGBUS;
			
 
				 	}
			
 
				 
			
 
				-	vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT);
			
 
				+	vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
			
 
				 
			
 
				 	mutex_unlock(&ctx->status_mutex);
			
 
				 
			
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -932,7 +932,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	unsigned long paddr, vaddr;
			
 
				 	unsigned long expires;
			
 
				 
			
 
				-	vaddr = (unsigned long)vmf->virtual_address;
			
 
				+	vaddr = vmf->address;
			
 
				 	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
			
 
				 		vma, vaddr, GSEG_BASE(vaddr));
			
 
				 	STAT(nopfn);
			
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -210,7 +210,12 @@ struct igb_tx_buffer {
 
				 struct igb_rx_buffer {
			
 
				 	dma_addr_t dma;
			
 
				 	struct page *page;
			
 
				-	unsigned int page_offset;
			
 
				+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
			
 
				+	__u32 page_offset;
			
 
				+#else
			
 
				+	__u16 page_offset;
			
 
				+#endif
			
 
				+	__u16 pagecnt_bias;
			
 
				 };
			
 
				 
			
 
				 struct igb_tx_queue_stats {
			
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3947,11 +3947,23 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
 
				 		if (!buffer_info->page)
			
 
				 			continue;
			
 
				 
			
 
				-		dma_unmap_page(rx_ring->dev,
			
 
				-			       buffer_info->dma,
			
 
				-			       PAGE_SIZE,
			
 
				-			       DMA_FROM_DEVICE);
			
 
				-		__free_page(buffer_info->page);
			
 
				+		/* Invalidate cache lines that may have been written to by
			
 
				+		 * device so that we avoid corrupting memory.
			
 
				+		 */
			
 
				+		dma_sync_single_range_for_cpu(rx_ring->dev,
			
 
				+					      buffer_info->dma,
			
 
				+					      buffer_info->page_offset,
			
 
				+					      IGB_RX_BUFSZ,
			
 
				+					      DMA_FROM_DEVICE);
			
 
				+
			
 
				+		/* free resources associated with mapping */
			
 
				+		dma_unmap_page_attrs(rx_ring->dev,
			
 
				+				     buffer_info->dma,
			
 
				+				     PAGE_SIZE,
			
 
				+				     DMA_FROM_DEVICE,
			
 
				+				     DMA_ATTR_SKIP_CPU_SYNC);
			
 
				+		__page_frag_drain(buffer_info->page, 0,
			
 
				+				  buffer_info->pagecnt_bias);
			
 
				 
			
 
				 		buffer_info->page = NULL;
			
 
				 	}
			
@@ -6812,12 +6824,6 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring,
 
				 
			
 
				 	/* transfer page from old buffer to new buffer */
			
 
				 	*new_buff = *old_buff;
			
 
				-
			
 
				-	/* sync the buffer for use by the device */
			
 
				-	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
			
 
				-					 old_buff->page_offset,
			
 
				-					 IGB_RX_BUFSZ,
			
 
				-					 DMA_FROM_DEVICE);
			
 
				 }
			
 
				 
			
 
				 static inline bool igb_page_is_reserved(struct page *page)
			
@@ -6829,13 +6835,15 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
 
				 				  struct page *page,
			
 
				 				  unsigned int truesize)
			
 
				 {
			
 
				+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
			
 
				+
			
 
				 	/* avoid re-using remote pages */
			
 
				 	if (unlikely(igb_page_is_reserved(page)))
			
 
				 		return false;
			
 
				 
			
 
				 #if (PAGE_SIZE < 8192)
			
 
				 	/* if we are only owner of page we can reuse it */
			
 
				-	if (unlikely(page_count(page) != 1))
			
 
				+	if (unlikely(page_ref_count(page) != pagecnt_bias))
			
 
				 		return false;
			
 
				 
			
 
				 	/* flip page offset to other buffer */
			
@@ -6848,10 +6856,14 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
 
				 		return false;
			
 
				 #endif
			
 
				 
			
 
				-	/* Even if we own the page, we are not allowed to use atomic_set()
			
 
				-	 * This would break get_page_unless_zero() users.
			
 
				+	/* If we have drained the page fragment pool we need to update
			
 
				+	 * the pagecnt_bias and page count so that we fully restock the
			
 
				+	 * number of references the driver holds.
			
 
				 	 */
			
 
				-	page_ref_inc(page);
			
 
				+	if (unlikely(pagecnt_bias == 1)) {
			
 
				+		page_ref_add(page, USHRT_MAX);
			
 
				+		rx_buffer->pagecnt_bias = USHRT_MAX;
			
 
				+	}
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -6903,7 +6915,6 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring,
 
				 			return true;
			
 
				 
			
 
				 		/* this page cannot be reused so discard it */
			
 
				-		__free_page(page);
			
 
				 		return false;
			
 
				 	}
			
 
				 
			
@@ -6938,6 +6949,13 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 
				 	page = rx_buffer->page;
			
 
				 	prefetchw(page);
			
 
				 
			
 
				+	/* we are reusing so sync this buffer for CPU use */
			
 
				+	dma_sync_single_range_for_cpu(rx_ring->dev,
			
 
				+				      rx_buffer->dma,
			
 
				+				      rx_buffer->page_offset,
			
 
				+				      size,
			
 
				+				      DMA_FROM_DEVICE);
			
 
				+
			
 
				 	if (likely(!skb)) {
			
 
				 		void *page_addr = page_address(page) +
			
 
				 				  rx_buffer->page_offset;
			
@@ -6962,21 +6980,18 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
 
				 		prefetchw(skb->data);
			
 
				 	}
			
 
				 
			
 
				-	/* we are reusing so sync this buffer for CPU use */
			
 
				-	dma_sync_single_range_for_cpu(rx_ring->dev,
			
 
				-				      rx_buffer->dma,
			
 
				-				      rx_buffer->page_offset,
			
 
				-				      size,
			
 
				-				      DMA_FROM_DEVICE);
			
 
				-
			
 
				 	/* pull page into skb */
			
 
				 	if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
			
 
				 		/* hand second half of page back to the ring */
			
 
				 		igb_reuse_rx_page(rx_ring, rx_buffer);
			
 
				 	} else {
			
 
				-		/* we are not reusing the buffer so unmap it */
			
 
				-		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
			
 
				-			       PAGE_SIZE, DMA_FROM_DEVICE);
			
 
				+		/* We are not reusing the buffer so unmap it and free
			
 
				+		 * any references we are holding to it
			
 
				+		 */
			
 
				+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
			
 
				+				     PAGE_SIZE, DMA_FROM_DEVICE,
			
 
				+				     DMA_ATTR_SKIP_CPU_SYNC);
			
 
				+		__page_frag_drain(page, 0, rx_buffer->pagecnt_bias);
			
 
				 	}
			
 
				 
			
 
				 	/* clear contents of rx_buffer */
			
@@ -7234,7 +7249,8 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 
				 	}
			
 
				 
			
 
				 	/* map page for use */
			
 
				-	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
			
 
				+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
			
 
				+				 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
			
 
				 
			
 
				 	/* if mapping failed free memory back to system since
			
 
				 	 * there isn't much point in holding memory we can't use
			
@@ -7249,6 +7265,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 
				 	bi->dma = dma;
			
 
				 	bi->page = page;
			
 
				 	bi->page_offset = 0;
			
 
				+	bi->pagecnt_bias = 1;
			
 
				 
			
 
				 	return true;
			
 
				 }
			
@@ -7275,6 +7292,12 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
 
				 		if (!igb_alloc_mapped_page(rx_ring, bi))
			
 
				 			break;
			
 
				 
			
 
				+		/* sync the buffer for use by the device */
			
 
				+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
			
 
				+						 bi->page_offset,
			
 
				+						 IGB_RX_BUFSZ,
			
 
				+						 DMA_FROM_DEVICE);
			
 
				+
			
 
				 		/* Refresh the desc even if buffer_addrs didn't change
			
 
				 		 * because each write-back erases this info.
			
 
				 		 */
			
--- a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c
@@ -900,8 +900,7 @@ static void iwlagn_gain_computation(struct iwl_priv *priv,
 
				 
			
 
				 		/* bound gain by 2 bits value max, 3rd bit is sign */
			
 
				 		data->delta_gain_code[i] =
			
 
				-			min(abs(delta_g),
			
 
				-			(s32) CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
			
 
				+			min(abs(delta_g), CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
			
 
				 
			
 
				 		if (delta_g < 0)
			
 
				 			/*
			
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -882,7 +882,7 @@ static int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
			
 
				 
			
 
				 	pfn = page_to_pfn(ion_buffer_page(buffer->pages[vmf->pgoff]));
			
 
				-	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
			
 
				+	ret = vm_insert_pfn(vma, vmf->address, pfn);
			
 
				 	mutex_unlock(&buffer->lock);
			
 
				 	if (ret)
			
 
				 		return VM_FAULT_ERROR;
			
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -1014,7 +1014,7 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 
				 		       "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
			
 
				 		       vmf->page, vmf->page->mapping, vmf->page->index,
			
 
				 		       (long)vmf->page->flags, page_count(vmf->page),
			
 
				-		       page_private(vmf->page), vmf->virtual_address);
			
 
				+		       page_private(vmf->page), (void *)vmf->address);
			
 
				 		if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
			
 
				 			lock_page(vmf->page);
			
 
				 			cfio->ft_flags |= VM_FAULT_LOCKED;
			
@@ -1025,12 +1025,12 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 
				 	}
			
 
				 
			
 
				 	if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
			
 
				-		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
			
 
				+		CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address);
			
 
				 		return -EFAULT;
			
 
				 	}
			
 
				 
			
 
				 	if (cfio->ft_flags & VM_FAULT_OOM) {
			
 
				-		CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
			
 
				+		CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -905,7 +905,7 @@ static void hidg_free_inst(struct usb_function_instance *f)
 
				 	mutex_lock(&hidg_ida_lock);
			
 
				 
			
 
				 	hidg_put_minor(opts->minor);
			
 
				-	if (idr_is_empty(&hidg_ida.idr))
			
 
				+	if (ida_is_empty(&hidg_ida))
			
 
				 		ghid_cleanup();
			
 
				 
			
 
				 	mutex_unlock(&hidg_ida_lock);
			
@@ -931,7 +931,7 @@ static struct usb_function_instance *hidg_alloc_inst(void)
 
				 
			
 
				 	mutex_lock(&hidg_ida_lock);
			
 
				 
			
 
				-	if (idr_is_empty(&hidg_ida.idr)) {
			
 
				+	if (ida_is_empty(&hidg_ida)) {
			
 
				 		status = ghid_setup(NULL, HIDG_MINORS);
			
 
				 		if (status)  {
			
 
				 			ret = ERR_PTR(status);
			
@@ -944,7 +944,7 @@ static struct usb_function_instance *hidg_alloc_inst(void)
 
				 	if (opts->minor < 0) {
			
 
				 		ret = ERR_PTR(opts->minor);
			
 
				 		kfree(opts);
			
 
				-		if (idr_is_empty(&hidg_ida.idr))
			
 
				+		if (ida_is_empty(&hidg_ida))
			
 
				 			ghid_cleanup();
			
 
				 		goto unlock;
			
 
				 	}
			
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -1265,7 +1265,7 @@ static void gprinter_free_inst(struct usb_function_instance *f)
 
				 	mutex_lock(&printer_ida_lock);
			
 
				 
			
 
				 	gprinter_put_minor(opts->minor);
			
 
				-	if (idr_is_empty(&printer_ida.idr))
			
 
				+	if (ida_is_empty(&printer_ida))
			
 
				 		gprinter_cleanup();
			
 
				 
			
 
				 	mutex_unlock(&printer_ida_lock);
			
@@ -1289,7 +1289,7 @@ static struct usb_function_instance *gprinter_alloc_inst(void)
 
				 
			
 
				 	mutex_lock(&printer_ida_lock);
			
 
				 
			
 
				-	if (idr_is_empty(&printer_ida.idr)) {
			
 
				+	if (ida_is_empty(&printer_ida)) {
			
 
				 		status = gprinter_setup(PRINTER_MINORS);
			
 
				 		if (status) {
			
 
				 			ret = ERR_PTR(status);
			
@@ -1302,7 +1302,7 @@ static struct usb_function_instance *gprinter_alloc_inst(void)
 
				 	if (opts->minor < 0) {
			
 
				 		ret = ERR_PTR(opts->minor);
			
 
				 		kfree(opts);
			
 
				-		if (idr_is_empty(&printer_ida.idr))
			
 
				+		if (ida_is_empty(&printer_ida))
			
 
				 			gprinter_cleanup();
			
 
				 		goto unlock;
			
 
				 	}
			
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -362,7 +362,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 
				 
			
 
				 		down_read(&mm->mmap_sem);
			
 
				 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
			
 
				-					    NULL);
			
 
				+					    NULL, NULL);
			
 
				 		up_read(&mm->mmap_sem);
			
 
				 	}
			
 
				 
			
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -602,7 +602,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 {
			
 
				 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
			
 
				 	       vma, vma->vm_start, vma->vm_end,
			
 
				-	       vmf->pgoff, vmf->virtual_address);
			
 
				+	       vmf->pgoff, (void *)vmf->address);
			
 
				 
			
 
				 	return VM_FAULT_SIGBUS;
			
 
				 }
			
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -202,12 +202,12 @@ static struct ratelimit_state printk_limits[] = {
 
				 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
			
 
				 {
			
 
				 	struct super_block *sb = fs_info->sb;
			
 
				-	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1];
			
 
				+	char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
			
 
				 	struct va_format vaf;
			
 
				 	va_list args;
			
 
				-	const char *type = NULL;
			
 
				 	int kern_level;
			
 
				-	struct ratelimit_state *ratelimit;
			
 
				+	const char *type = logtypes[4];
			
 
				+	struct ratelimit_state *ratelimit = &printk_limits[4];
			
 
				 
			
 
				 	va_start(args, fmt);
			
 
				 
			
@@ -223,12 +223,6 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 
				 		fmt += size;
			
 
				 	}
			
 
				 
			
 
				-	if (!type) {
			
 
				-		*lvl = '\0';
			
 
				-		type = logtypes[4];
			
 
				-		ratelimit = &printk_limits[4];
			
 
				-	}
			
 
				-
			
 
				 	vaf.fmt = fmt;
			
 
				 	vaf.va = &args;
			
 
				 
			
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -162,6 +162,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
 
				 				slot = radix_tree_iter_retry(&iter);
			
 
				 			continue;
			
 
				 		}
			
 
				+		slot = radix_tree_iter_resume(slot, &iter);
			
 
				 		spin_unlock(&fs_info->buffer_lock);
			
 
				 		free_extent_buffer_stale(eb);
			
 
				 		spin_lock(&fs_info->buffer_lock);
			
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,7 @@
 
				 #include <linux/vmstat.h>
			
 
				 #include <linux/pfn_t.h>
			
 
				 #include <linux/sizes.h>
			
 
				+#include <linux/mmu_notifier.h>
			
 
				 #include <linux/iomap.h>
			
 
				 #include "internal.h"
			
 
				 
			
@@ -240,6 +241,23 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void dax_unlock_mapping_entry(struct address_space *mapping,
			
 
				+				     pgoff_t index)
			
 
				+{
			
 
				+	void *entry, **slot;
			
 
				+
			
 
				+	spin_lock_irq(&mapping->tree_lock);
			
 
				+	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
			
 
				+	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
			
 
				+			 !slot_locked(mapping, slot))) {
			
 
				+		spin_unlock_irq(&mapping->tree_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+	unlock_slot(mapping, slot);
			
 
				+	spin_unlock_irq(&mapping->tree_lock);
			
 
				+	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
			
 
				+}
			
 
				+
			
 
				 static void put_locked_mapping_entry(struct address_space *mapping,
			
 
				 				     pgoff_t index, void *entry)
			
 
				 {
			
@@ -433,22 +451,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
				 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
			
 
				 }
			
 
				 
			
 
				-void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
			
 
				-{
			
 
				-	void *entry, **slot;
			
 
				-
			
 
				-	spin_lock_irq(&mapping->tree_lock);
			
 
				-	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
			
 
				-	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
			
 
				-			 !slot_locked(mapping, slot))) {
			
 
				-		spin_unlock_irq(&mapping->tree_lock);
			
 
				-		return;
			
 
				-	}
			
 
				-	unlock_slot(mapping, slot);
			
 
				-	spin_unlock_irq(&mapping->tree_lock);
			
 
				-	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
			
 
				  * entry to get unlocked before deleting it.
			
@@ -500,10 +502,8 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 
				 	/* This will replace locked radix tree entry with a hole page */
			
 
				 	page = find_or_create_page(mapping, vmf->pgoff,
			
 
				 				   vmf->gfp_mask | __GFP_ZERO);
			
 
				-	if (!page) {
			
 
				-		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
			
 
				+	if (!page)
			
 
				 		return VM_FAULT_OOM;
			
 
				-	}
			
 
				 	vmf->page = page;
			
 
				 	return VM_FAULT_LOCKED;
			
 
				 }
			
@@ -615,36 +615,107 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 
				 	return new_entry;
			
 
				 }
			
 
				 
			
 
				+static inline unsigned long
			
 
				+pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
			
 
				+{
			
 
				+	unsigned long address;
			
 
				+
			
 
				+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
			
 
				+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
			
 
				+	return address;
			
 
				+}
			
 
				+
			
 
				+/* Walk all mappings of a given index of a file and writeprotect them */
			
 
				+static void dax_mapping_entry_mkclean(struct address_space *mapping,
			
 
				+				      pgoff_t index, unsigned long pfn)
			
 
				+{
			
 
				+	struct vm_area_struct *vma;
			
 
				+	pte_t *ptep;
			
 
				+	pte_t pte;
			
 
				+	spinlock_t *ptl;
			
 
				+	bool changed;
			
 
				+
			
 
				+	i_mmap_lock_read(mapping);
			
 
				+	vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
			
 
				+		unsigned long address;
			
 
				+
			
 
				+		cond_resched();
			
 
				+
			
 
				+		if (!(vma->vm_flags & VM_SHARED))
			
 
				+			continue;
			
 
				+
			
 
				+		address = pgoff_address(index, vma);
			
 
				+		changed = false;
			
 
				+		if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
			
 
				+			continue;
			
 
				+		if (pfn != pte_pfn(*ptep))
			
 
				+			goto unlock;
			
 
				+		if (!pte_dirty(*ptep) && !pte_write(*ptep))
			
 
				+			goto unlock;
			
 
				+
			
 
				+		flush_cache_page(vma, address, pfn);
			
 
				+		pte = ptep_clear_flush(vma, address, ptep);
			
 
				+		pte = pte_wrprotect(pte);
			
 
				+		pte = pte_mkclean(pte);
			
 
				+		set_pte_at(vma->vm_mm, address, ptep, pte);
			
 
				+		changed = true;
			
 
				+unlock:
			
 
				+		pte_unmap_unlock(ptep, ptl);
			
 
				+
			
 
				+		if (changed)
			
 
				+			mmu_notifier_invalidate_page(vma->vm_mm, address);
			
 
				+	}
			
 
				+	i_mmap_unlock_read(mapping);
			
 
				+}
			
 
				+
			
 
				 static int dax_writeback_one(struct block_device *bdev,
			
 
				 		struct address_space *mapping, pgoff_t index, void *entry)
			
 
				 {
			
 
				 	struct radix_tree_root *page_tree = &mapping->page_tree;
			
 
				-	struct radix_tree_node *node;
			
 
				 	struct blk_dax_ctl dax;
			
 
				-	void **slot;
			
 
				+	void *entry2, **slot;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	spin_lock_irq(&mapping->tree_lock);
			
 
				 	/*
			
 
				-	 * Regular page slots are stabilized by the page lock even
			
 
				-	 * without the tree itself locked.  These unlocked entries
			
 
				-	 * need verification under the tree lock.
			
 
				+	 * A page got tagged dirty in DAX mapping? Something is seriously
			
 
				+	 * wrong.
			
 
				 	 */
			
 
				-	if (!__radix_tree_lookup(page_tree, index, &node, &slot))
			
 
				-		goto unlock;
			
 
				-	if (*slot != entry)
			
 
				-		goto unlock;
			
 
				-
			
 
				-	/* another fsync thread may have already written back this entry */
			
 
				-	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
			
 
				-		goto unlock;
			
 
				+	if (WARN_ON(!radix_tree_exceptional_entry(entry)))
			
 
				+		return -EIO;
			
 
				 
			
 
				+	spin_lock_irq(&mapping->tree_lock);
			
 
				+	entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
			
 
				+	/* Entry got punched out / reallocated? */
			
 
				+	if (!entry2 || !radix_tree_exceptional_entry(entry2))
			
 
				+		goto put_unlocked;
			
 
				+	/*
			
 
				+	 * Entry got reallocated elsewhere? No need to writeback. We have to
			
 
				+	 * compare sectors as we must not bail out due to difference in lockbit
			
 
				+	 * or entry type.
			
 
				+	 */
			
 
				+	if (dax_radix_sector(entry2) != dax_radix_sector(entry))
			
 
				+		goto put_unlocked;
			
 
				 	if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
			
 
				 				dax_is_zero_entry(entry))) {
			
 
				 		ret = -EIO;
			
 
				-		goto unlock;
			
 
				+		goto put_unlocked;
			
 
				 	}
			
 
				 
			
 
				+	/* Another fsync thread may have already written back this entry */
			
 
				+	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
			
 
				+		goto put_unlocked;
			
 
				+	/* Lock the entry to serialize with page faults */
			
 
				+	entry = lock_slot(mapping, slot);
			
 
				+	/*
			
 
				+	 * We can clear the tag now but we have to be careful so that concurrent
			
 
				+	 * dax_writeback_one() calls for the same index cannot finish before we
			
 
				+	 * actually flush the caches. This is achieved as the calls will look
			
 
				+	 * at the entry only under tree_lock and once they do that they will
			
 
				+	 * see the entry locked and wait for it to unlock.
			
 
				+	 */
			
 
				+	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
			
 
				+	spin_unlock_irq(&mapping->tree_lock);
			
 
				+
			
 
				 	/*
			
 
				 	 * Even if dax_writeback_mapping_range() was given a wbc->range_start
			
 
				 	 * in the middle of a PMD, the 'index' we are given will be aligned to
			
@@ -654,31 +725,40 @@ static int dax_writeback_one(struct block_device *bdev,
 
				 	 */
			
 
				 	dax.sector = dax_radix_sector(entry);
			
 
				 	dax.size = PAGE_SIZE << dax_radix_order(entry);
			
 
				-	spin_unlock_irq(&mapping->tree_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * We cannot hold tree_lock while calling dax_map_atomic() because it
			
 
				 	 * eventually calls cond_resched().
			
 
				 	 */
			
 
				 	ret = dax_map_atomic(bdev, &dax);
			
 
				-	if (ret < 0)
			
 
				+	if (ret < 0) {
			
 
				+		put_locked_mapping_entry(mapping, index, entry);
			
 
				 		return ret;
			
 
				+	}
			
 
				 
			
 
				 	if (WARN_ON_ONCE(ret < dax.size)) {
			
 
				 		ret = -EIO;
			
 
				 		goto unmap;
			
 
				 	}
			
 
				 
			
 
				+	dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(dax.pfn));
			
 
				 	wb_cache_pmem(dax.addr, dax.size);
			
 
				-
			
 
				+	/*
			
 
				+	 * After we have flushed the cache, we can clear the dirty tag. There
			
 
				+	 * cannot be new dirty data in the pfn after the flush has completed as
			
 
				+	 * the pfn mappings are writeprotected and fault waits for mapping
			
 
				+	 * entry lock.
			
 
				+	 */
			
 
				 	spin_lock_irq(&mapping->tree_lock);
			
 
				-	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
			
 
				+	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
			
 
				 	spin_unlock_irq(&mapping->tree_lock);
			
 
				  unmap:
			
 
				 	dax_unmap_atomic(bdev, &dax);
			
 
				+	put_locked_mapping_entry(mapping, index, entry);
			
 
				 	return ret;
			
 
				 
			
 
				- unlock:
			
 
				+ put_unlocked:
			
 
				+	put_unlocked_mapping_entry(mapping, index, entry2);
			
 
				 	spin_unlock_irq(&mapping->tree_lock);
			
 
				 	return ret;
			
 
				 }
			
@@ -738,7 +818,7 @@ static int dax_insert_mapping(struct address_space *mapping,
 
				 		struct block_device *bdev, sector_t sector, size_t size,
			
 
				 		void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
			
 
				+	unsigned long vaddr = vmf->address;
			
 
				 	struct blk_dax_ctl dax = {
			
 
				 		.sector = sector,
			
 
				 		.size = size,
			
@@ -767,17 +847,27 @@ int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 {
			
 
				 	struct file *file = vma->vm_file;
			
 
				 	struct address_space *mapping = file->f_mapping;
			
 
				-	void *entry;
			
 
				+	void *entry, **slot;
			
 
				 	pgoff_t index = vmf->pgoff;
			
 
				 
			
 
				 	spin_lock_irq(&mapping->tree_lock);
			
 
				-	entry = get_unlocked_mapping_entry(mapping, index, NULL);
			
 
				-	if (!entry || !radix_tree_exceptional_entry(entry))
			
 
				-		goto out;
			
 
				+	entry = get_unlocked_mapping_entry(mapping, index, &slot);
			
 
				+	if (!entry || !radix_tree_exceptional_entry(entry)) {
			
 
				+		if (entry)
			
 
				+			put_unlocked_mapping_entry(mapping, index, entry);
			
 
				+		spin_unlock_irq(&mapping->tree_lock);
			
 
				+		return VM_FAULT_NOPAGE;
			
 
				+	}
			
 
				 	radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
			
 
				-	put_unlocked_mapping_entry(mapping, index, entry);
			
 
				-out:
			
 
				+	entry = lock_slot(mapping, slot);
			
 
				 	spin_unlock_irq(&mapping->tree_lock);
			
 
				+	/*
			
 
				+	 * If we race with somebody updating the PTE and finish_mkwrite_fault()
			
 
				+	 * fails, we don't care. We need to return VM_FAULT_NOPAGE and retry
			
 
				+	 * the fault in either case.
			
 
				+	 */
			
 
				+	finish_mkwrite_fault(vmf);
			
 
				+	put_locked_mapping_entry(mapping, index, entry);
			
 
				 	return VM_FAULT_NOPAGE;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
			
@@ -948,13 +1038,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 {
			
 
				 	struct address_space *mapping = vma->vm_file->f_mapping;
			
 
				 	struct inode *inode = mapping->host;
			
 
				-	unsigned long vaddr = (unsigned long)vmf->virtual_address;
			
 
				+	unsigned long vaddr = vmf->address;
			
 
				 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
			
 
				 	sector_t sector;
			
 
				 	struct iomap iomap = { 0 };
			
 
				 	unsigned flags = IOMAP_FAULT;
			
 
				 	int error, major = 0;
			
 
				-	int locked_status = 0;
			
 
				+	int vmf_ret = 0;
			
 
				 	void *entry;
			
 
				 
			
 
				 	/*
			
@@ -1007,13 +1097,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 
			
 
				 		if (error)
			
 
				 			goto finish_iomap;
			
 
				-		if (!radix_tree_exceptional_entry(entry)) {
			
 
				-			vmf->page = entry;
			
 
				-			locked_status = VM_FAULT_LOCKED;
			
 
				-		} else {
			
 
				-			vmf->entry = entry;
			
 
				-			locked_status = VM_FAULT_DAX_LOCKED;
			
 
				-		}
			
 
				+
			
 
				+		__SetPageUptodate(vmf->cow_page);
			
 
				+		vmf_ret = finish_fault(vmf);
			
 
				+		if (!vmf_ret)
			
 
				+			vmf_ret = VM_FAULT_DONE_COW;
			
 
				 		goto finish_iomap;
			
 
				 	}
			
 
				 
			
@@ -1030,7 +1118,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 	case IOMAP_UNWRITTEN:
			
 
				 	case IOMAP_HOLE:
			
 
				 		if (!(vmf->flags & FAULT_FLAG_WRITE)) {
			
 
				-			locked_status = dax_load_hole(mapping, entry, vmf);
			
 
				+			vmf_ret = dax_load_hole(mapping, entry, vmf);
			
 
				 			break;
			
 
				 		}
			
 
				 		/*FALLTHRU*/
			
@@ -1042,7 +1130,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 
			
 
				  finish_iomap:
			
 
				 	if (ops->iomap_end) {
			
 
				-		if (error) {
			
 
				+		if (error || (vmf_ret & VM_FAULT_ERROR)) {
			
 
				 			/* keep previous error */
			
 
				 			ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
			
 
				 					&iomap);
			
@@ -1052,7 +1140,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 		}
			
 
				 	}
			
 
				  unlock_entry:
			
 
				-	if (!locked_status || error)
			
 
				+	if (vmf_ret != VM_FAULT_LOCKED || error)
			
 
				 		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
			
 
				  out:
			
 
				 	if (error == -ENOMEM)
			
@@ -1060,9 +1148,9 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 
				 	/* -EBUSY is fine, somebody else faulted on the same PTE */
			
 
				 	if (error < 0 && error != -EBUSY)
			
 
				 		return VM_FAULT_SIGBUS | major;
			
 
				-	if (locked_status) {
			
 
				+	if (vmf_ret) {
			
 
				 		WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
			
 
				-		return locked_status;
			
 
				+		return vmf_ret;
			
 
				 	}
			
 
				 	return VM_FAULT_NOPAGE | major;
			
 
				 }
			
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -209,7 +209,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
				 	 * doing the exec and bprm->mm is the new process's mm.
			
 
				 	 */
			
 
				 	ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
			
 
				-			&page, NULL);
			
 
				+			&page, NULL, NULL);
			
 
				 	if (ret <= 0)
			
 
				 		return NULL;
			
 
				 
			
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -257,9 +257,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 
				  * fatal_signal_pending()s, and the mmap_sem must be released before
			
 
				  * returning it.
			
 
				  */
			
 
				-int handle_userfault(struct fault_env *fe, unsigned long reason)
			
 
				+int handle_userfault(struct vm_fault *vmf, unsigned long reason)
			
 
				 {
			
 
				-	struct mm_struct *mm = fe->vma->vm_mm;
			
 
				+	struct mm_struct *mm = vmf->vma->vm_mm;
			
 
				 	struct userfaultfd_ctx *ctx;
			
 
				 	struct userfaultfd_wait_queue uwq;
			
 
				 	int ret;
			
@@ -268,7 +268,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
				 	BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
			
 
				 
			
 
				 	ret = VM_FAULT_SIGBUS;
			
 
				-	ctx = fe->vma->vm_userfaultfd_ctx.ctx;
			
 
				+	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
			
 
				 	if (!ctx)
			
 
				 		goto out;
			
 
				 
			
@@ -301,17 +301,18 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
				 	 * without first stopping userland access to the memory. For
			
 
				 	 * VM_UFFD_MISSING userfaults this is enough for now.
			
 
				 	 */
			
 
				-	if (unlikely(!(fe->flags & FAULT_FLAG_ALLOW_RETRY))) {
			
 
				+	if (unlikely(!(vmf->flags & FAULT_FLAG_ALLOW_RETRY))) {
			
 
				 		/*
			
 
				 		 * Validate the invariant that nowait must allow retry
			
 
				 		 * to be sure not to return SIGBUS erroneously on
			
 
				 		 * nowait invocations.
			
 
				 		 */
			
 
				-		BUG_ON(fe->flags & FAULT_FLAG_RETRY_NOWAIT);
			
 
				+		BUG_ON(vmf->flags & FAULT_FLAG_RETRY_NOWAIT);
			
 
				 #ifdef CONFIG_DEBUG_VM
			
 
				 		if (printk_ratelimit()) {
			
 
				 			printk(KERN_WARNING
			
 
				-			       "FAULT_FLAG_ALLOW_RETRY missing %x\n", fe->flags);
			
 
				+			       "FAULT_FLAG_ALLOW_RETRY missing %x\n",
			
 
				+			       vmf->flags);
			
 
				 			dump_stack();
			
 
				 		}
			
 
				 #endif
			
@@ -323,7 +324,7 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
				 	 * and wait.
			
 
				 	 */
			
 
				 	ret = VM_FAULT_RETRY;
			
 
				-	if (fe->flags & FAULT_FLAG_RETRY_NOWAIT)
			
 
				+	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
			
 
				 		goto out;
			
 
				 
			
 
				 	/* take the reference before dropping the mmap_sem */
			
@@ -331,11 +332,11 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
				 
			
 
				 	init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
			
 
				 	uwq.wq.private = current;
			
 
				-	uwq.msg = userfault_msg(fe->address, fe->flags, reason);
			
 
				+	uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
			
 
				 	uwq.ctx = ctx;
			
 
				 
			
 
				 	return_to_userland =
			
 
				-		(fe->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
			
 
				+		(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
			
 
				 		(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
			
 
				 
			
 
				 	spin_lock(&ctx->fault_pending_wqh.lock);
			
@@ -353,7 +354,8 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 
				 			  TASK_KILLABLE);
			
 
				 	spin_unlock(&ctx->fault_pending_wqh.lock);
			
 
				 
			
 
				-	must_wait = userfaultfd_must_wait(ctx, fe->address, fe->flags, reason);
			
 
				+	must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
			
 
				+					  reason);
			
 
				 	up_read(&mm->mmap_sem);
			
 
				 
			
 
				 	if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&
			
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -46,7 +46,6 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
				 
			
 
				 #ifdef CONFIG_FS_DAX
			
 
				 struct page *read_dax_sector(struct block_device *bdev, sector_t n);
			
 
				-void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
			
 
				 int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
			
 
				 		unsigned int offset, unsigned int length);
			
 
				 #else
			
@@ -55,12 +54,6 @@ static inline struct page *read_dax_sector(struct block_device *bdev,
 
				 {
			
 
				 	return ERR_PTR(-ENXIO);
			
 
				 }
			
 
				-/* Shouldn't ever be called when dax is disabled. */
			
 
				-static inline void dax_unlock_mapping_entry(struct address_space *mapping,
			
 
				-					    pgoff_t index)
			
 
				-{
			
 
				-	BUG();
			
 
				-}
			
 
				 static inline int __dax_zero_page_range(struct block_device *bdev,
			
 
				 		sector_t sector, unsigned int offset, unsigned int length)
			
 
				 {
			
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -243,29 +243,33 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
 
				 		ops->unmap_sg(dev, sg, nents, dir, attrs);
			
 
				 }
			
 
				 
			
 
				-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
			
 
				-				      size_t offset, size_t size,
			
 
				-				      enum dma_data_direction dir)
			
 
				+static inline dma_addr_t dma_map_page_attrs(struct device *dev,
			
 
				+					    struct page *page,
			
 
				+					    size_t offset, size_t size,
			
 
				+					    enum dma_data_direction dir,
			
 
				+					    unsigned long attrs)
			
 
				 {
			
 
				 	struct dma_map_ops *ops = get_dma_ops(dev);
			
 
				 	dma_addr_t addr;
			
 
				 
			
 
				 	kmemcheck_mark_initialized(page_address(page) + offset, size);
			
 
				 	BUG_ON(!valid_dma_direction(dir));
			
 
				-	addr = ops->map_page(dev, page, offset, size, dir, 0);
			
 
				+	addr = ops->map_page(dev, page, offset, size, dir, attrs);
			
 
				 	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
			
 
				 
			
 
				 	return addr;
			
 
				 }
			
 
				 
			
 
				-static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
			
 
				-				  size_t size, enum dma_data_direction dir)
			
 
				+static inline void dma_unmap_page_attrs(struct device *dev,
			
 
				+					dma_addr_t addr, size_t size,
			
 
				+					enum dma_data_direction dir,
			
 
				+					unsigned long attrs)
			
 
				 {
			
 
				 	struct dma_map_ops *ops = get_dma_ops(dev);
			
 
				 
			
 
				 	BUG_ON(!valid_dma_direction(dir));
			
 
				 	if (ops->unmap_page)
			
 
				-		ops->unmap_page(dev, addr, size, dir, 0);
			
 
				+		ops->unmap_page(dev, addr, size, dir, attrs);
			
 
				 	debug_dma_unmap_page(dev, addr, size, dir, false);
			
 
				 }
			
 
				 
			
@@ -385,6 +389,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 
				 #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)
			
 
				 #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0)
			
 
				 #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0)
			
 
				+#define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0)
			
 
				+#define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0)
			
 
				 
			
 
				 extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
			
 
				 			   void *cpu_addr, dma_addr_t dma_addr, size_t size);
			
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -506,6 +506,8 @@ extern void free_hot_cold_page(struct page *page, bool cold);
 
				 extern void free_hot_cold_page_list(struct list_head *list, bool cold);
			
 
				 
			
 
				 struct page_frag_cache;
			
 
				+extern void __page_frag_drain(struct page *page, unsigned int order,
			
 
				+			      unsigned int count);
			
 
				 extern void *__alloc_page_frag(struct page_frag_cache *nc,
			
 
				 			       unsigned int fragsz, gfp_t gfp_mask);
			
 
				 extern void __free_page_frag(void *addr);
			
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -1,12 +1,12 @@
 
				 #ifndef _LINUX_HUGE_MM_H
			
 
				 #define _LINUX_HUGE_MM_H
			
 
				 
			
 
				-extern int do_huge_pmd_anonymous_page(struct fault_env *fe);
			
 
				+extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
			
 
				 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
			
 
				 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
			
 
				 			 struct vm_area_struct *vma);
			
 
				-extern void huge_pmd_set_accessed(struct fault_env *fe, pmd_t orig_pmd);
			
 
				-extern int do_huge_pmd_wp_page(struct fault_env *fe, pmd_t orig_pmd);
			
 
				+extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
			
 
				+extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
			
 
				 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
			
 
				 					  unsigned long addr,
			
 
				 					  pmd_t *pmd,
			
@@ -142,7 +142,7 @@ static inline int hpage_nr_pages(struct page *page)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-extern int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd);
			
 
				+extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
			
 
				 
			
 
				 extern struct page *huge_zero_page;
			
 
				 
			
@@ -212,7 +212,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static inline int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t orig_pmd)
			
 
				+static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -18,12 +18,11 @@
 
				 #include <linux/rcupdate.h>
			
 
				 
			
 
				 /*
			
 
				- * We want shallower trees and thus more bits covered at each layer.  8
			
 
				- * bits gives us large enough first layer for most use cases and maximum
			
 
				- * tree depth of 4.  Each idr_layer is slightly larger than 2k on 64bit and
			
 
				- * 1k on 32bit.
			
 
				+ * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
			
 
				+ * 8 bits only gave us 3 layers out of every pair of pages, which is less
			
 
				+ * efficient except for trees with a largest element between 192-255 inclusive.
			
 
				  */
			
 
				-#define IDR_BITS 8
			
 
				+#define IDR_BITS 6
			
 
				 #define IDR_SIZE (1 << IDR_BITS)
			
 
				 #define IDR_MASK ((1 << IDR_BITS)-1)
			
 
				 
			
@@ -55,6 +54,32 @@ struct idr {
 
				 }
			
 
				 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
			
 
				 
			
 
				+/**
			
 
				+ * idr_get_cursor - Return the current position of the cyclic allocator
			
 
				+ * @idr: idr handle
			
 
				+ *
			
 
				+ * The value returned is the value that will be next returned from
			
 
				+ * idr_alloc_cyclic() if it is free (otherwise the search will start from
			
 
				+ * this position).
			
 
				+ */
			
 
				+static inline unsigned int idr_get_cursor(struct idr *idr)
			
 
				+{
			
 
				+	return READ_ONCE(idr->cur);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * idr_set_cursor - Set the current position of the cyclic allocator
			
 
				+ * @idr: idr handle
			
 
				+ * @val: new position
			
 
				+ *
			
 
				+ * The next call to idr_alloc_cyclic() will return @val if it is free
			
 
				+ * (otherwise the search will start from this position).
			
 
				+ */
			
 
				+static inline void idr_set_cursor(struct idr *idr, unsigned int val)
			
 
				+{
			
 
				+	WRITE_ONCE(idr->cur, val);
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * DOC: idr sync
			
 
				  * idr synchronization (stolen from radix-tree.h)
			
@@ -195,6 +220,11 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
 
				 	return ida_get_new_above(ida, 0, p_id);
			
 
				 }
			
 
				 
			
 
				+static inline bool ida_is_empty(struct ida *ida)
			
 
				+{
			
 
				+	return idr_is_empty(&ida->idr);
			
 
				+}
			
 
				+
			
 
				 void __init idr_init_cache(void);
			
 
				 
			
 
				 #endif /* __IDR_H__ */
			
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -77,7 +77,6 @@ extern int kdb_poll_idx;
 
				  * number whenever the kernel debugger is entered.
			
 
				  */
			
 
				 extern int kdb_initial_cpu;
			
 
				-extern atomic_t kdb_event;
			
 
				 
			
 
				 /* Types and messages used for dynamically added kdb shell commands */
			
 
				 
			
@@ -162,6 +161,7 @@ enum kdb_msgsrc {
 
				 };
			
 
				 
			
 
				 extern int kdb_trap_printk;
			
 
				+extern int kdb_printf_cpu;
			
 
				 extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
			
 
				 				      va_list args);
			
 
				 extern __printf(1, 2) int kdb_printf(const char *, ...);
			
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -259,12 +259,6 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 
				 	vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
			
 
				 #define VMCOREINFO_CONFIG(name) \
			
 
				 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
			
 
				-#define VMCOREINFO_PAGE_OFFSET(value) \
			
 
				-	vmcoreinfo_append_str("PAGE_OFFSET=%lx\n", (unsigned long)value)
			
 
				-#define VMCOREINFO_VMALLOC_START(value) \
			
 
				-	vmcoreinfo_append_str("VMALLOC_START=%lx\n", (unsigned long)value)
			
 
				-#define VMCOREINFO_VMEMMAP_START(value) \
			
 
				-	vmcoreinfo_append_str("VMEMMAP_START=%lx\n", (unsigned long)value)
			
 
				 
			
 
				 extern struct kimage *kexec_image;
			
 
				 extern struct kimage *kexec_crash_image;
			
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -292,36 +292,23 @@ extern pgprot_t protection_map[16];
 
				  * pgoff should be used in favour of virtual_address, if possible.
			
 
				  */
			
 
				 struct vm_fault {
			
 
				+	struct vm_area_struct *vma;	/* Target VMA */
			
 
				 	unsigned int flags;		/* FAULT_FLAG_xxx flags */
			
 
				 	gfp_t gfp_mask;			/* gfp mask to be used for allocations */
			
 
				 	pgoff_t pgoff;			/* Logical page offset based on vma */
			
 
				-	void __user *virtual_address;	/* Faulting virtual address */
			
 
				+	unsigned long address;		/* Faulting virtual address */
			
 
				+	pmd_t *pmd;			/* Pointer to pmd entry matching
			
 
				+					 * the 'address' */
			
 
				+	pte_t orig_pte;			/* Value of PTE at the time of fault */
			
 
				 
			
 
				-	struct page *cow_page;		/* Handler may choose to COW */
			
 
				+	struct page *cow_page;		/* Page handler may use for COW fault */
			
 
				+	struct mem_cgroup *memcg;	/* Cgroup cow_page belongs to */
			
 
				 	struct page *page;		/* ->fault handlers should return a
			
 
				 					 * page here, unless VM_FAULT_NOPAGE
			
 
				 					 * is set (which is also implied by
			
 
				 					 * VM_FAULT_ERROR).
			
 
				 					 */
			
 
				-	void *entry;			/* ->fault handler can alternatively
			
 
				-					 * return locked DAX entry. In that
			
 
				-					 * case handler should return
			
 
				-					 * VM_FAULT_DAX_LOCKED and fill in
			
 
				-					 * entry here.
			
 
				-					 */
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Page fault context: passes though page fault handler instead of endless list
			
 
				- * of function arguments.
			
 
				- */
			
 
				-struct fault_env {
			
 
				-	struct vm_area_struct *vma;	/* Target VMA */
			
 
				-	unsigned long address;		/* Faulting virtual address */
			
 
				-	unsigned int flags;		/* FAULT_FLAG_xxx flags */
			
 
				-	pmd_t *pmd;			/* Pointer to pmd entry matching
			
 
				-					 * the 'address'
			
 
				-					 */
			
 
				+	/* These three entries are valid only while holding ptl lock */
			
 
				 	pte_t *pte;			/* Pointer to pte entry matching
			
 
				 					 * the 'address'. NULL if the page
			
 
				 					 * table hasn't been allocated.
			
@@ -351,7 +338,7 @@ struct vm_operations_struct {
 
				 	int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
			
 
				 	int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
			
 
				 						pmd_t *, unsigned int flags);
			
 
				-	void (*map_pages)(struct fault_env *fe,
			
 
				+	void (*map_pages)(struct vm_fault *vmf,
			
 
				 			pgoff_t start_pgoff, pgoff_t end_pgoff);
			
 
				 
			
 
				 	/* notification that a previously read-only page is about to become
			
@@ -625,8 +612,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 
				 	return pte;
			
 
				 }
			
 
				 
			
 
				-int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
			
 
				+int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
			
 
				 		struct page *page);
			
 
				+int finish_fault(struct vm_fault *vmf);
			
 
				+int finish_mkwrite_fault(struct vm_fault *vmf);
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -1110,7 +1099,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
 
				 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
			
 
				 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
			
 
				 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
			
 
				-#define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */
			
 
				+#define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
			
 
				 
			
 
				 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
			
 
				 
			
@@ -1221,6 +1210,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 
				 			struct vm_area_struct *vma);
			
 
				 void unmap_mapping_range(struct address_space *mapping,
			
 
				 		loff_t const holebegin, loff_t const holelen, int even_cows);
			
 
				+int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
			
 
				+	       spinlock_t **ptlp);
			
 
				 int follow_pfn(struct vm_area_struct *vma, unsigned long address,
			
 
				 	unsigned long *pfn);
			
 
				 int follow_phys(struct vm_area_struct *vma, unsigned long address,
			
@@ -1276,15 +1267,12 @@ extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 
				 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
			
 
				 			    unsigned long start, unsigned long nr_pages,
			
 
				 			    unsigned int gup_flags, struct page **pages,
			
 
				-			    struct vm_area_struct **vmas);
			
 
				+			    struct vm_area_struct **vmas, int *locked);
			
 
				 long get_user_pages(unsigned long start, unsigned long nr_pages,
			
 
				 			    unsigned int gup_flags, struct page **pages,
			
 
				 			    struct vm_area_struct **vmas);
			
 
				 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
			
 
				 		    unsigned int gup_flags, struct page **pages, int *locked);
			
 
				-long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
			
 
				-			       unsigned long start, unsigned long nr_pages,
			
 
				-			       struct page **pages, unsigned int gup_flags);
			
 
				 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
			
 
				 		    struct page **pages, unsigned int gup_flags);
			
 
				 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
			
@@ -2099,7 +2087,7 @@ extern void truncate_inode_pages_final(struct address_space *);
 
				 
			
 
				 /* generic vm_area_ops exported for stackable file systems */
			
 
				 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
			
 
				-extern void filemap_map_pages(struct fault_env *fe,
			
 
				+extern void filemap_map_pages(struct vm_fault *vmf,
			
 
				 		pgoff_t start_pgoff, pgoff_t end_pgoff);
			
 
				 extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
			
 
				 
			
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
 
				 #include <linux/sched.h>
			
 
				 #include <asm/irq.h>
			
 
				 
			
 
				+/*
			
 
				+ * The run state of the lockup detectors is controlled by the content of the
			
 
				+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
			
 
				+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
			
 
				+ *
			
 
				+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
			
 
				+ * are variables that are only used as an 'interface' between the parameters
			
 
				+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
			
 
				+ * 'watchdog_thresh' variable is handled differently because its value is not
			
 
				+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
			
 
				+ * is equal zero.
			
 
				+ */
			
 
				+#define NMI_WATCHDOG_ENABLED_BIT   0
			
 
				+#define SOFT_WATCHDOG_ENABLED_BIT  1
			
 
				+#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
			
 
				+#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
			
 
				+
			
 
				 /**
			
 
				  * touch_nmi_watchdog - restart NMI watchdog timeout.
			
 
				  * 
			
@@ -91,9 +108,16 @@ extern int nmi_watchdog_enabled;
 
				 extern int soft_watchdog_enabled;
			
 
				 extern int watchdog_user_enabled;
			
 
				 extern int watchdog_thresh;
			
 
				+extern unsigned long watchdog_enabled;
			
 
				 extern unsigned long *watchdog_cpumask_bits;
			
 
				+#ifdef CONFIG_SMP
			
 
				 extern int sysctl_softlockup_all_cpu_backtrace;
			
 
				 extern int sysctl_hardlockup_all_cpu_backtrace;
			
 
				+#else
			
 
				+#define sysctl_softlockup_all_cpu_backtrace 0
			
 
				+#define sysctl_hardlockup_all_cpu_backtrace 0
			
 
				+#endif
			
 
				+extern bool is_hardlockup(void);
			
 
				 struct ctl_table;
			
 
				 extern int proc_watchdog(struct ctl_table *, int ,
			
 
				 			 void __user *, size_t *, loff_t *);
			
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -80,23 +80,25 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
				 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
			
 
				 					  RADIX_TREE_MAP_SHIFT))
			
 
				 
			
 
				+/*
			
 
				+ * @count is the count of every non-NULL element in the ->slots array
			
 
				+ * whether that is an exceptional entry, a retry entry, a user pointer,
			
 
				+ * a sibling entry or a pointer to the next level of the tree.
			
 
				+ * @exceptional is the count of every element in ->slots which is
			
 
				+ * either radix_tree_exceptional_entry() or is a sibling entry for an
			
 
				+ * exceptional entry.
			
 
				+ */
			
 
				 struct radix_tree_node {
			
 
				 	unsigned char	shift;		/* Bits remaining in each slot */
			
 
				 	unsigned char	offset;		/* Slot offset in parent */
			
 
				 	unsigned char	count;		/* Total entry count */
			
 
				 	unsigned char	exceptional;	/* Exceptional entry count */
			
 
				+	struct radix_tree_node *parent;		/* Used when ascending tree */
			
 
				+	void *private_data;			/* For tree user */
			
 
				 	union {
			
 
				-		struct {
			
 
				-			/* Used when ascending tree */
			
 
				-			struct radix_tree_node *parent;
			
 
				-			/* For tree user */
			
 
				-			void *private_data;
			
 
				-		};
			
 
				-		/* Used when freeing node */
			
 
				-		struct rcu_head	rcu_head;
			
 
				+		struct list_head private_list;	/* For tree user */
			
 
				+		struct rcu_head	rcu_head;	/* Used when freeing node */
			
 
				 	};
			
 
				-	/* For tree user */
			
 
				-	struct list_head private_list;
			
 
				 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
			
 
				 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
			
 
				 };
			
@@ -126,6 +128,41 @@ static inline bool radix_tree_empty(struct radix_tree_root *root)
 
				 	return root->rnode == NULL;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * struct radix_tree_iter - radix tree iterator state
			
 
				+ *
			
 
				+ * @index:	index of current slot
			
 
				+ * @next_index:	one beyond the last index for this chunk
			
 
				+ * @tags:	bit-mask for tag-iterating
			
 
				+ * @node:	node that contains current slot
			
 
				+ * @shift:	shift for the node that holds our slots
			
 
				+ *
			
 
				+ * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
			
 
				+ * subinterval of slots contained within one radix tree leaf node.  It is
			
 
				+ * described by a pointer to its first slot and a struct radix_tree_iter
			
 
				+ * which holds the chunk's position in the tree and its size.  For tagged
			
 
				+ * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
			
 
				+ * radix tree tag.
			
 
				+ */
			
 
				+struct radix_tree_iter {
			
 
				+	unsigned long	index;
			
 
				+	unsigned long	next_index;
			
 
				+	unsigned long	tags;
			
 
				+	struct radix_tree_node *node;
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+	unsigned int	shift;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
			
 
				+{
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+	return iter->shift;
			
 
				+#else
			
 
				+	return 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * Radix-tree synchronization
			
 
				  *
			
@@ -264,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 
				 			  struct radix_tree_node *node,
			
 
				 			  void **slot, void *item,
			
 
				 			  radix_tree_update_node_t update_node, void *private);
			
 
				+void radix_tree_iter_replace(struct radix_tree_root *,
			
 
				+		const struct radix_tree_iter *, void **slot, void *item);
			
 
				 void radix_tree_replace_slot(struct radix_tree_root *root,
			
 
				 			     void **slot, void *item);
			
 
				 void __radix_tree_delete_node(struct radix_tree_root *root,
			
@@ -289,6 +328,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 
				 			unsigned long index, unsigned int tag);
			
 
				 int radix_tree_tag_get(struct radix_tree_root *root,
			
 
				 			unsigned long index, unsigned int tag);
			
 
				+void radix_tree_iter_tag_set(struct radix_tree_root *root,
			
 
				+		const struct radix_tree_iter *iter, unsigned int tag);
			
 
				 unsigned int
			
 
				 radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
			
 
				 		unsigned long first_index, unsigned int max_items,
			
@@ -297,50 +338,18 @@ unsigned int
 
				 radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
			
 
				 		unsigned long first_index, unsigned int max_items,
			
 
				 		unsigned int tag);
			
 
				-unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
			
 
				-		unsigned long *first_indexp, unsigned long last_index,
			
 
				-		unsigned long nr_to_tag,
			
 
				-		unsigned int fromtag, unsigned int totag);
			
 
				 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
			
 
				-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
			
 
				 
			
 
				 static inline void radix_tree_preload_end(void)
			
 
				 {
			
 
				 	preempt_enable();
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * struct radix_tree_iter - radix tree iterator state
			
 
				- *
			
 
				- * @index:	index of current slot
			
 
				- * @next_index:	one beyond the last index for this chunk
			
 
				- * @tags:	bit-mask for tag-iterating
			
 
				- * @shift:	shift for the node that holds our slots
			
 
				- *
			
 
				- * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
			
 
				- * subinterval of slots contained within one radix tree leaf node.  It is
			
 
				- * described by a pointer to its first slot and a struct radix_tree_iter
			
 
				- * which holds the chunk's position in the tree and its size.  For tagged
			
 
				- * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
			
 
				- * radix tree tag.
			
 
				- */
			
 
				-struct radix_tree_iter {
			
 
				-	unsigned long	index;
			
 
				-	unsigned long	next_index;
			
 
				-	unsigned long	tags;
			
 
				-#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				-	unsigned int	shift;
			
 
				-#endif
			
 
				-};
			
 
				-
			
 
				-static inline unsigned int iter_shift(struct radix_tree_iter *iter)
			
 
				-{
			
 
				-#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				-	return iter->shift;
			
 
				-#else
			
 
				-	return 0;
			
 
				-#endif
			
 
				-}
			
 
				+int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
			
 
				+int radix_tree_split(struct radix_tree_root *, unsigned long index,
			
 
				+			unsigned new_order);
			
 
				+int radix_tree_join(struct radix_tree_root *, unsigned long index,
			
 
				+			unsigned new_order, void *);
			
 
				 
			
 
				 #define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
			
 
				 #define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
			
@@ -409,20 +418,17 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * radix_tree_iter_next - resume iterating when the chunk may be invalid
			
 
				- * @iter:	iterator state
			
 
				+ * radix_tree_iter_resume - resume iterating when the chunk may be invalid
			
 
				+ * @slot: pointer to current slot
			
 
				+ * @iter: iterator state
			
 
				+ * Returns: New slot pointer
			
 
				  *
			
 
				  * If the iterator needs to release then reacquire a lock, the chunk may
			
 
				  * have been invalidated by an insertion or deletion.  Call this function
			
 
				- * to continue the iteration from the next index.
			
 
				+ * before releasing the lock to continue the iteration from the next index.
			
 
				  */
			
 
				-static inline __must_check
			
 
				-void **radix_tree_iter_next(struct radix_tree_iter *iter)
			
 
				-{
			
 
				-	iter->next_index = __radix_tree_iter_add(iter, 1);
			
 
				-	iter->tags = 0;
			
 
				-	return NULL;
			
 
				-}
			
 
				+void **__must_check radix_tree_iter_resume(void **slot,
			
 
				+					struct radix_tree_iter *iter);
			
 
				 
			
 
				 /**
			
 
				  * radix_tree_chunk_size - get current chunk size
			
@@ -436,10 +442,17 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
 
				 	return (iter->next_index - iter->index) >> iter_shift(iter);
			
 
				 }
			
 
				 
			
 
				-static inline struct radix_tree_node *entry_to_node(void *ptr)
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
			
 
				+				unsigned flags);
			
 
				+#else
			
 
				+/* Can't happen without sibling entries, but the compiler can't tell that */
			
 
				+static inline void ** __radix_tree_next_slot(void **slot,
			
 
				+				struct radix_tree_iter *iter, unsigned flags)
			
 
				 {
			
 
				-	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
			
 
				+	return slot;
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 /**
			
 
				  * radix_tree_next_slot - find next slot in chunk
			
@@ -453,7 +466,7 @@ static inline struct radix_tree_node *entry_to_node(void *ptr)
 
				  * For tagged lookup it also eats @iter->tags.
			
 
				  *
			
 
				  * There are several cases where 'slot' can be passed in as NULL to this
			
 
				- * function.  These cases result from the use of radix_tree_iter_next() or
			
 
				+ * function.  These cases result from the use of radix_tree_iter_resume() or
			
 
				  * radix_tree_iter_retry().  In these cases we don't end up dereferencing
			
 
				  * 'slot' because either:
			
 
				  * a) we are doing tagged iteration and iter->tags has been set to 0, or
			
@@ -464,51 +477,31 @@ static __always_inline void **
 
				 radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
			
 
				 {
			
 
				 	if (flags & RADIX_TREE_ITER_TAGGED) {
			
 
				-		void *canon = slot;
			
 
				-
			
 
				 		iter->tags >>= 1;
			
 
				 		if (unlikely(!iter->tags))
			
 
				 			return NULL;
			
 
				-		while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) &&
			
 
				-					radix_tree_is_internal_node(slot[1])) {
			
 
				-			if (entry_to_node(slot[1]) == canon) {
			
 
				-				iter->tags >>= 1;
			
 
				-				iter->index = __radix_tree_iter_add(iter, 1);
			
 
				-				slot++;
			
 
				-				continue;
			
 
				-			}
			
 
				-			iter->next_index = __radix_tree_iter_add(iter, 1);
			
 
				-			return NULL;
			
 
				-		}
			
 
				 		if (likely(iter->tags & 1ul)) {
			
 
				 			iter->index = __radix_tree_iter_add(iter, 1);
			
 
				-			return slot + 1;
			
 
				+			slot++;
			
 
				+			goto found;
			
 
				 		}
			
 
				 		if (!(flags & RADIX_TREE_ITER_CONTIG)) {
			
 
				 			unsigned offset = __ffs(iter->tags);
			
 
				 
			
 
				-			iter->tags >>= offset;
			
 
				-			iter->index = __radix_tree_iter_add(iter, offset + 1);
			
 
				-			return slot + offset + 1;
			
 
				+			iter->tags >>= offset++;
			
 
				+			iter->index = __radix_tree_iter_add(iter, offset);
			
 
				+			slot += offset;
			
 
				+			goto found;
			
 
				 		}
			
 
				 	} else {
			
 
				 		long count = radix_tree_chunk_size(iter);
			
 
				-		void *canon = slot;
			
 
				 
			
 
				 		while (--count > 0) {
			
 
				 			slot++;
			
 
				 			iter->index = __radix_tree_iter_add(iter, 1);
			
 
				 
			
 
				-			if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) &&
			
 
				-			    radix_tree_is_internal_node(*slot)) {
			
 
				-				if (entry_to_node(*slot) == canon)
			
 
				-					continue;
			
 
				-				iter->next_index = iter->index;
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				 			if (likely(*slot))
			
 
				-				return slot;
			
 
				+				goto found;
			
 
				 			if (flags & RADIX_TREE_ITER_CONTIG) {
			
 
				 				/* forbid switching to the next chunk */
			
 
				 				iter->next_index = 0;
			
@@ -517,6 +510,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 
				 		}
			
 
				 	}
			
 
				 	return NULL;
			
 
				+
			
 
				+ found:
			
 
				+	if (unlikely(radix_tree_is_internal_node(*slot)))
			
 
				+		return __radix_tree_next_slot(slot, iter, flags);
			
 
				+	return slot;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -567,6 +565,6 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 
				 	     slot || (slot = radix_tree_next_chunk(root, iter,		\
			
 
				 			      RADIX_TREE_ITER_TAGGED | tag)) ;		\
			
 
				 	     slot = radix_tree_next_slot(slot, iter,			\
			
 
				-				RADIX_TREE_ITER_TAGGED))
			
 
				+				RADIX_TREE_ITER_TAGGED | tag))
			
 
				 
			
 
				 #endif /* _LINUX_RADIX_TREE_H */
			
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -97,6 +97,23 @@ static inline int sigisemptyset(sigset_t *set)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2)
			
 
				+{
			
 
				+	switch (_NSIG_WORDS) {
			
 
				+	case 4:
			
 
				+		return	(set1->sig[3] == set2->sig[3]) &&
			
 
				+			(set1->sig[2] == set2->sig[2]) &&
			
 
				+			(set1->sig[1] == set2->sig[1]) &&
			
 
				+			(set1->sig[0] == set2->sig[0]);
			
 
				+	case 2:
			
 
				+		return	(set1->sig[1] == set2->sig[1]) &&
			
 
				+			(set1->sig[0] == set2->sig[0]);
			
 
				+	case 1:
			
 
				+		return	set1->sig[0] == set2->sig[0];
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 #define sigmask(sig)	(1UL << ((sig) - 1))
			
 
				 
			
 
				 #ifndef __HAVE_ARCH_SIG_SETOPS
			
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -27,7 +27,7 @@
 
				 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
			
 
				 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
			
 
				 
			
 
				-extern int handle_userfault(struct fault_env *fe, unsigned long reason);
			
 
				+extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
			
 
				 
			
 
				 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
			
 
				 			    unsigned long src_start, unsigned long len);
			
@@ -55,7 +55,7 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
 
				 #else /* CONFIG_USERFAULTFD */
			
 
				 
			
 
				 /* mm helpers */
			
 
				-static inline int handle_userfault(struct fault_env *fe, unsigned long reason)
			
 
				+static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
			
 
				 {
			
 
				 	return VM_FAULT_SIGBUS;
			
 
				 }
			
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -763,7 +763,10 @@ static inline int convert_mode(long *msgtyp, int msgflg)
 
				 	if (*msgtyp == 0)
			
 
				 		return SEARCH_ANY;
			
 
				 	if (*msgtyp < 0) {
			
 
				-		*msgtyp = -*msgtyp;
			
 
				+		if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
			
 
				+			*msgtyp = LONG_MAX;
			
 
				+		else
			
 
				+			*msgtyp = -*msgtyp;
			
 
				 		return SEARCH_LESSEQUAL;
			
 
				 	}
			
 
				 	if (msgflg & MSG_EXCEPT)
			
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -11,6 +11,7 @@
 
				  * (c) 2001 Red Hat Inc
			
 
				  * Lockless wakeup
			
 
				  * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
			
 
				+ * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
			
 
				  * Further wakeup optimizations, documentation
			
 
				  * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
			
 
				  *
			
@@ -53,15 +54,11 @@
 
				  *   Semaphores are actively given to waiting tasks (necessary for FIFO).
			
 
				  *   (see update_queue())
			
 
				  * - To improve the scalability, the actual wake-up calls are performed after
			
 
				- *   dropping all locks. (see wake_up_sem_queue_prepare(),
			
 
				- *   wake_up_sem_queue_do())
			
 
				+ *   dropping all locks. (see wake_up_sem_queue_prepare())
			
 
				  * - All work is done by the waker, the woken up task does not have to do
			
 
				  *   anything - not even acquiring a lock or dropping a refcount.
			
 
				  * - A woken up task may not even touch the semaphore array anymore, it may
			
 
				  *   have been destroyed already by a semctl(RMID).
			
 
				- * - The synchronizations between wake-ups due to a timeout/signal and a
			
 
				- *   wake-up due to a completed semaphore operation is achieved by using an
			
 
				- *   intermediate state (IN_WAKEUP).
			
 
				  * - UNDO values are stored in an array (one per process and per
			
 
				  *   semaphore array, lazily allocated). For backwards compatibility, multiple
			
 
				  *   modes for the UNDO variables are supported (per process, per thread)
			
@@ -118,7 +115,8 @@ struct sem_queue {
 
				 	struct sembuf		*sops;	 /* array of pending operations */
			
 
				 	struct sembuf		*blocking; /* the operation that blocked */
			
 
				 	int			nsops;	 /* number of operations */
			
 
				-	int			alter;	 /* does *sops alter the array? */
			
 
				+	bool			alter;	 /* does *sops alter the array? */
			
 
				+	bool                    dupsop;	 /* sops on more than one sem_num */
			
 
				 };
			
 
				 
			
 
				 /* Each task has a list of undo requests. They are executed automatically
			
@@ -416,29 +414,6 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 
				  *
			
 
				  * The caller holds the RCU read lock.
			
 
				  */
			
 
				-static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
			
 
				-			int id, struct sembuf *sops, int nsops, int *locknum)
			
 
				-{
			
 
				-	struct kern_ipc_perm *ipcp;
			
 
				-	struct sem_array *sma;
			
 
				-
			
 
				-	ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
			
 
				-	if (IS_ERR(ipcp))
			
 
				-		return ERR_CAST(ipcp);
			
 
				-
			
 
				-	sma = container_of(ipcp, struct sem_array, sem_perm);
			
 
				-	*locknum = sem_lock(sma, sops, nsops);
			
 
				-
			
 
				-	/* ipc_rmid() may have already freed the ID while sem_lock
			
 
				-	 * was spinning: verify that the structure is still valid
			
 
				-	 */
			
 
				-	if (ipc_valid_object(ipcp))
			
 
				-		return container_of(ipcp, struct sem_array, sem_perm);
			
 
				-
			
 
				-	sem_unlock(sma, *locknum);
			
 
				-	return ERR_PTR(-EINVAL);
			
 
				-}
			
 
				-
			
 
				 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
			
 
				 {
			
 
				 	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
			
@@ -471,40 +446,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
 
				 	ipc_rmid(&sem_ids(ns), &s->sem_perm);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Lockless wakeup algorithm:
			
 
				- * Without the check/retry algorithm a lockless wakeup is possible:
			
 
				- * - queue.status is initialized to -EINTR before blocking.
			
 
				- * - wakeup is performed by
			
 
				- *	* unlinking the queue entry from the pending list
			
 
				- *	* setting queue.status to IN_WAKEUP
			
 
				- *	  This is the notification for the blocked thread that a
			
 
				- *	  result value is imminent.
			
 
				- *	* call wake_up_process
			
 
				- *	* set queue.status to the final value.
			
 
				- * - the previously blocked thread checks queue.status:
			
 
				- *	* if it's IN_WAKEUP, then it must wait until the value changes
			
 
				- *	* if it's not -EINTR, then the operation was completed by
			
 
				- *	  update_queue. semtimedop can return queue.status without
			
 
				- *	  performing any operation on the sem array.
			
 
				- *	* otherwise it must acquire the spinlock and check what's up.
			
 
				- *
			
 
				- * The two-stage algorithm is necessary to protect against the following
			
 
				- * races:
			
 
				- * - if queue.status is set after wake_up_process, then the woken up idle
			
 
				- *   thread could race forward and try (and fail) to acquire sma->lock
			
 
				- *   before update_queue had a chance to set queue.status
			
 
				- * - if queue.status is written before wake_up_process and if the
			
 
				- *   blocked process is woken up by a signal between writing
			
 
				- *   queue.status and the wake_up_process, then the woken up
			
 
				- *   process could return from semtimedop and die by calling
			
 
				- *   sys_exit before wake_up_process is called. Then wake_up_process
			
 
				- *   will oops, because the task structure is already invalid.
			
 
				- *   (yes, this happened on s390 with sysv msg).
			
 
				- *
			
 
				- */
			
 
				-#define IN_WAKEUP	1
			
 
				-
			
 
				 /**
			
 
				  * newary - Create a new semaphore set
			
 
				  * @ns: namespace
			
@@ -624,15 +565,23 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * perform_atomic_semop - Perform (if possible) a semaphore operation
			
 
				+ * perform_atomic_semop[_slow] - Attempt to perform semaphore
			
 
				+ *                               operations on a given array.
			
 
				  * @sma: semaphore array
			
 
				  * @q: struct sem_queue that describes the operation
			
 
				  *
			
 
				+ * Caller blocking are as follows, based the value
			
 
				+ * indicated by the semaphore operation (sem_op):
			
 
				+ *
			
 
				+ *  (1) >0 never blocks.
			
 
				+ *  (2)  0 (wait-for-zero operation): semval is non-zero.
			
 
				+ *  (3) <0 attempting to decrement semval to a value smaller than zero.
			
 
				+ *
			
 
				  * Returns 0 if the operation was possible.
			
 
				  * Returns 1 if the operation is impossible, the caller must sleep.
			
 
				- * Negative values are error codes.
			
 
				+ * Returns <0 for error codes.
			
 
				  */
			
 
				-static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
			
 
				+static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
			
 
				 {
			
 
				 	int result, sem_op, nsops, pid;
			
 
				 	struct sembuf *sop;
			
@@ -703,51 +652,84 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				-/** wake_up_sem_queue_prepare(q, error): Prepare wake-up
			
 
				- * @q: queue entry that must be signaled
			
 
				- * @error: Error value for the signal
			
 
				- *
			
 
				- * Prepare the wake-up of the queue entry q.
			
 
				- */
			
 
				-static void wake_up_sem_queue_prepare(struct list_head *pt,
			
 
				-				struct sem_queue *q, int error)
			
 
				+static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
			
 
				 {
			
 
				-	if (list_empty(pt)) {
			
 
				-		/*
			
 
				-		 * Hold preempt off so that we don't get preempted and have the
			
 
				-		 * wakee busy-wait until we're scheduled back on.
			
 
				-		 */
			
 
				-		preempt_disable();
			
 
				+	int result, sem_op, nsops;
			
 
				+	struct sembuf *sop;
			
 
				+	struct sem *curr;
			
 
				+	struct sembuf *sops;
			
 
				+	struct sem_undo *un;
			
 
				+
			
 
				+	sops = q->sops;
			
 
				+	nsops = q->nsops;
			
 
				+	un = q->undo;
			
 
				+
			
 
				+	if (unlikely(q->dupsop))
			
 
				+		return perform_atomic_semop_slow(sma, q);
			
 
				+
			
 
				+	/*
			
 
				+	 * We scan the semaphore set twice, first to ensure that the entire
			
 
				+	 * operation can succeed, therefore avoiding any pointless writes
			
 
				+	 * to shared memory and having to undo such changes in order to block
			
 
				+	 * until the operations can go through.
			
 
				+	 */
			
 
				+	for (sop = sops; sop < sops + nsops; sop++) {
			
 
				+		curr = sma->sem_base + sop->sem_num;
			
 
				+		sem_op = sop->sem_op;
			
 
				+		result = curr->semval;
			
 
				+
			
 
				+		if (!sem_op && result)
			
 
				+			goto would_block; /* wait-for-zero */
			
 
				+
			
 
				+		result += sem_op;
			
 
				+		if (result < 0)
			
 
				+			goto would_block;
			
 
				+
			
 
				+		if (result > SEMVMX)
			
 
				+			return -ERANGE;
			
 
				+
			
 
				+		if (sop->sem_flg & SEM_UNDO) {
			
 
				+			int undo = un->semadj[sop->sem_num] - sem_op;
			
 
				+
			
 
				+			/* Exceeding the undo range is an error. */
			
 
				+			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
			
 
				+				return -ERANGE;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for (sop = sops; sop < sops + nsops; sop++) {
			
 
				+		curr = sma->sem_base + sop->sem_num;
			
 
				+		sem_op = sop->sem_op;
			
 
				+		result = curr->semval;
			
 
				+
			
 
				+		if (sop->sem_flg & SEM_UNDO) {
			
 
				+			int undo = un->semadj[sop->sem_num] - sem_op;
			
 
				+
			
 
				+			un->semadj[sop->sem_num] = undo;
			
 
				+		}
			
 
				+		curr->semval += sem_op;
			
 
				+		curr->sempid = q->pid;
			
 
				 	}
			
 
				-	q->status = IN_WAKEUP;
			
 
				-	q->pid = error;
			
 
				 
			
 
				-	list_add_tail(&q->list, pt);
			
 
				+	return 0;
			
 
				+
			
 
				+would_block:
			
 
				+	q->blocking = sop;
			
 
				+	return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * wake_up_sem_queue_do - do the actual wake-up
			
 
				- * @pt: list of tasks to be woken up
			
 
				- *
			
 
				- * Do the actual wake-up.
			
 
				- * The function is called without any locks held, thus the semaphore array
			
 
				- * could be destroyed already and the tasks can disappear as soon as the
			
 
				- * status is set to the actual return code.
			
 
				- */
			
 
				-static void wake_up_sem_queue_do(struct list_head *pt)
			
 
				+static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
			
 
				+					     struct wake_q_head *wake_q)
			
 
				 {
			
 
				-	struct sem_queue *q, *t;
			
 
				-	int did_something;
			
 
				-
			
 
				-	did_something = !list_empty(pt);
			
 
				-	list_for_each_entry_safe(q, t, pt, list) {
			
 
				-		wake_up_process(q->sleeper);
			
 
				-		/* q can disappear immediately after writing q->status. */
			
 
				-		smp_wmb();
			
 
				-		q->status = q->pid;
			
 
				-	}
			
 
				-	if (did_something)
			
 
				-		preempt_enable();
			
 
				+	wake_q_add(wake_q, q->sleeper);
			
 
				+	/*
			
 
				+	 * Rely on the above implicit barrier, such that we can
			
 
				+	 * ensure that we hold reference to the task before setting
			
 
				+	 * q->status. Otherwise we could race with do_exit if the
			
 
				+	 * task is awoken by an external event before calling
			
 
				+	 * wake_up_process().
			
 
				+	 */
			
 
				+	WRITE_ONCE(q->status, error);
			
 
				 }
			
 
				 
			
 
				 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
			
@@ -767,7 +749,7 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
 
				  * modified the array.
			
 
				  * Note that wait-for-zero operations are handled without restart.
			
 
				  */
			
 
				-static int check_restart(struct sem_array *sma, struct sem_queue *q)
			
 
				+static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
			
 
				 {
			
 
				 	/* pending complex alter operations are too difficult to analyse */
			
 
				 	if (!list_empty(&sma->pending_alter))
			
@@ -795,21 +777,20 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
 
				  * wake_const_ops - wake up non-alter tasks
			
 
				  * @sma: semaphore array.
			
 
				  * @semnum: semaphore that was modified.
			
 
				- * @pt: list head for the tasks that must be woken up.
			
 
				+ * @wake_q: lockless wake-queue head.
			
 
				  *
			
 
				  * wake_const_ops must be called after a semaphore in a semaphore array
			
 
				  * was set to 0. If complex const operations are pending, wake_const_ops must
			
 
				  * be called with semnum = -1, as well as with the number of each modified
			
 
				  * semaphore.
			
 
				- * The tasks that must be woken up are added to @pt. The return code
			
 
				+ * The tasks that must be woken up are added to @wake_q. The return code
			
 
				  * is stored in q->pid.
			
 
				  * The function returns 1 if at least one operation was completed successfully.
			
 
				  */
			
 
				 static int wake_const_ops(struct sem_array *sma, int semnum,
			
 
				-				struct list_head *pt)
			
 
				+			  struct wake_q_head *wake_q)
			
 
				 {
			
 
				-	struct sem_queue *q;
			
 
				-	struct list_head *walk;
			
 
				+	struct sem_queue *q, *tmp;
			
 
				 	struct list_head *pending_list;
			
 
				 	int semop_completed = 0;
			
 
				 
			
@@ -818,25 +799,19 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 
				 	else
			
 
				 		pending_list = &sma->sem_base[semnum].pending_const;
			
 
				 
			
 
				-	walk = pending_list->next;
			
 
				-	while (walk != pending_list) {
			
 
				-		int error;
			
 
				-
			
 
				-		q = container_of(walk, struct sem_queue, list);
			
 
				-		walk = walk->next;
			
 
				-
			
 
				-		error = perform_atomic_semop(sma, q);
			
 
				-
			
 
				-		if (error <= 0) {
			
 
				-			/* operation completed, remove from queue & wakeup */
			
 
				+	list_for_each_entry_safe(q, tmp, pending_list, list) {
			
 
				+		int error = perform_atomic_semop(sma, q);
			
 
				 
			
 
				-			unlink_queue(sma, q);
			
 
				+		if (error > 0)
			
 
				+			continue;
			
 
				+		/* operation completed, remove from queue & wakeup */
			
 
				+		unlink_queue(sma, q);
			
 
				 
			
 
				-			wake_up_sem_queue_prepare(pt, q, error);
			
 
				-			if (error == 0)
			
 
				-				semop_completed = 1;
			
 
				-		}
			
 
				+		wake_up_sem_queue_prepare(q, error, wake_q);
			
 
				+		if (error == 0)
			
 
				+			semop_completed = 1;
			
 
				 	}
			
 
				+
			
 
				 	return semop_completed;
			
 
				 }
			
 
				 
			
@@ -845,14 +820,14 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 
				  * @sma: semaphore array
			
 
				  * @sops: operations that were performed
			
 
				  * @nsops: number of operations
			
 
				- * @pt: list head of the tasks that must be woken up.
			
 
				+ * @wake_q: lockless wake-queue head
			
 
				  *
			
 
				  * Checks all required queue for wait-for-zero operations, based
			
 
				  * on the actual changes that were performed on the semaphore array.
			
 
				  * The function returns 1 if at least one operation was completed successfully.
			
 
				  */
			
 
				 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
			
 
				-					int nsops, struct list_head *pt)
			
 
				+				int nsops, struct wake_q_head *wake_q)
			
 
				 {
			
 
				 	int i;
			
 
				 	int semop_completed = 0;
			
@@ -865,7 +840,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 
				 
			
 
				 			if (sma->sem_base[num].semval == 0) {
			
 
				 				got_zero = 1;
			
 
				-				semop_completed |= wake_const_ops(sma, num, pt);
			
 
				+				semop_completed |= wake_const_ops(sma, num, wake_q);
			
 
				 			}
			
 
				 		}
			
 
				 	} else {
			
@@ -876,7 +851,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 
				 		for (i = 0; i < sma->sem_nsems; i++) {
			
 
				 			if (sma->sem_base[i].semval == 0) {
			
 
				 				got_zero = 1;
			
 
				-				semop_completed |= wake_const_ops(sma, i, pt);
			
 
				+				semop_completed |= wake_const_ops(sma, i, wake_q);
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -885,7 +860,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 
				 	 * then check the global queue, too.
			
 
				 	 */
			
 
				 	if (got_zero)
			
 
				-		semop_completed |= wake_const_ops(sma, -1, pt);
			
 
				+		semop_completed |= wake_const_ops(sma, -1, wake_q);
			
 
				 
			
 
				 	return semop_completed;
			
 
				 }
			
@@ -895,22 +870,21 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 
				  * update_queue - look for tasks that can be completed.
			
 
				  * @sma: semaphore array.
			
 
				  * @semnum: semaphore that was modified.
			
 
				- * @pt: list head for the tasks that must be woken up.
			
 
				+ * @wake_q: lockless wake-queue head.
			
 
				  *
			
 
				  * update_queue must be called after a semaphore in a semaphore array
			
 
				  * was modified. If multiple semaphores were modified, update_queue must
			
 
				  * be called with semnum = -1, as well as with the number of each modified
			
 
				  * semaphore.
			
 
				- * The tasks that must be woken up are added to @pt. The return code
			
 
				+ * The tasks that must be woken up are added to @wake_q. The return code
			
 
				  * is stored in q->pid.
			
 
				  * The function internally checks if const operations can now succeed.
			
 
				  *
			
 
				  * The function return 1 if at least one semop was completed successfully.
			
 
				  */
			
 
				-static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
			
 
				+static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
			
 
				 {
			
 
				-	struct sem_queue *q;
			
 
				-	struct list_head *walk;
			
 
				+	struct sem_queue *q, *tmp;
			
 
				 	struct list_head *pending_list;
			
 
				 	int semop_completed = 0;
			
 
				 
			
@@ -920,13 +894,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
 
				 		pending_list = &sma->sem_base[semnum].pending_alter;
			
 
				 
			
 
				 again:
			
 
				-	walk = pending_list->next;
			
 
				-	while (walk != pending_list) {
			
 
				+	list_for_each_entry_safe(q, tmp, pending_list, list) {
			
 
				 		int error, restart;
			
 
				 
			
 
				-		q = container_of(walk, struct sem_queue, list);
			
 
				-		walk = walk->next;
			
 
				-
			
 
				 		/* If we are scanning the single sop, per-semaphore list of
			
 
				 		 * one semaphore and that semaphore is 0, then it is not
			
 
				 		 * necessary to scan further: simple increments
			
@@ -949,11 +919,11 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
 
				 			restart = 0;
			
 
				 		} else {
			
 
				 			semop_completed = 1;
			
 
				-			do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
			
 
				+			do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
			
 
				 			restart = check_restart(sma, q);
			
 
				 		}
			
 
				 
			
 
				-		wake_up_sem_queue_prepare(pt, q, error);
			
 
				+		wake_up_sem_queue_prepare(q, error, wake_q);
			
 
				 		if (restart)
			
 
				 			goto again;
			
 
				 	}
			
@@ -984,24 +954,24 @@ static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 
				  * @sops: operations that were performed
			
 
				  * @nsops: number of operations
			
 
				  * @otime: force setting otime
			
 
				- * @pt: list head of the tasks that must be woken up.
			
 
				+ * @wake_q: lockless wake-queue head
			
 
				  *
			
 
				  * do_smart_update() does the required calls to update_queue and wakeup_zero,
			
 
				  * based on the actual changes that were performed on the semaphore array.
			
 
				  * Note that the function does not do the actual wake-up: the caller is
			
 
				- * responsible for calling wake_up_sem_queue_do(@pt).
			
 
				+ * responsible for calling wake_up_q().
			
 
				  * It is safe to perform this call after dropping all locks.
			
 
				  */
			
 
				 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
			
 
				-			int otime, struct list_head *pt)
			
 
				+			    int otime, struct wake_q_head *wake_q)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				-	otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
			
 
				+	otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
			
 
				 
			
 
				 	if (!list_empty(&sma->pending_alter)) {
			
 
				 		/* semaphore array uses the global queue - just process it. */
			
 
				-		otime |= update_queue(sma, -1, pt);
			
 
				+		otime |= update_queue(sma, -1, wake_q);
			
 
				 	} else {
			
 
				 		if (!sops) {
			
 
				 			/*
			
@@ -1009,7 +979,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
 
				 			 * known. Check all.
			
 
				 			 */
			
 
				 			for (i = 0; i < sma->sem_nsems; i++)
			
 
				-				otime |= update_queue(sma, i, pt);
			
 
				+				otime |= update_queue(sma, i, wake_q);
			
 
				 		} else {
			
 
				 			/*
			
 
				 			 * Check the semaphores that were increased:
			
@@ -1023,7 +993,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
 
				 			for (i = 0; i < nsops; i++) {
			
 
				 				if (sops[i].sem_op > 0) {
			
 
				 					otime |= update_queue(sma,
			
 
				-							sops[i].sem_num, pt);
			
 
				+							      sops[i].sem_num, wake_q);
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
@@ -1111,8 +1081,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
				 	struct sem_undo *un, *tu;
			
 
				 	struct sem_queue *q, *tq;
			
 
				 	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
			
 
				-	struct list_head tasks;
			
 
				 	int i;
			
 
				+	DEFINE_WAKE_Q(wake_q);
			
 
				 
			
 
				 	/* Free the existing undo structures for this semaphore set.  */
			
 
				 	ipc_assert_locked_object(&sma->sem_perm);
			
@@ -1126,25 +1096,24 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
				 	}
			
 
				 
			
 
				 	/* Wake up all pending processes and let them fail with EIDRM. */
			
 
				-	INIT_LIST_HEAD(&tasks);
			
 
				 	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
			
 
				 		unlink_queue(sma, q);
			
 
				-		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
			
 
				+		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
			
 
				 	}
			
 
				 
			
 
				 	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
			
 
				 		unlink_queue(sma, q);
			
 
				-		wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
			
 
				+		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
			
 
				 	}
			
 
				 	for (i = 0; i < sma->sem_nsems; i++) {
			
 
				 		struct sem *sem = sma->sem_base + i;
			
 
				 		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
			
 
				 			unlink_queue(sma, q);
			
 
				-			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
			
 
				+			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
			
 
				 		}
			
 
				 		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
			
 
				 			unlink_queue(sma, q);
			
 
				-			wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
			
 
				+			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1153,7 +1122,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
				 	sem_unlock(sma, -1);
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				-	wake_up_sem_queue_do(&tasks);
			
 
				+	wake_up_q(&wake_q);
			
 
				 	ns->used_sems -= sma->sem_nsems;
			
 
				 	ipc_rcu_putref(sma, sem_rcu_free);
			
 
				 }
			
@@ -1292,9 +1261,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 
				 	struct sem_undo *un;
			
 
				 	struct sem_array *sma;
			
 
				 	struct sem *curr;
			
 
				-	int err;
			
 
				-	struct list_head tasks;
			
 
				-	int val;
			
 
				+	int err, val;
			
 
				+	DEFINE_WAKE_Q(wake_q);
			
 
				+
			
 
				 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
			
 
				 	/* big-endian 64bit */
			
 
				 	val = arg >> 32;
			
@@ -1306,8 +1275,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 
				 	if (val > SEMVMX || val < 0)
			
 
				 		return -ERANGE;
			
 
				 
			
 
				-	INIT_LIST_HEAD(&tasks);
			
 
				-
			
 
				 	rcu_read_lock();
			
 
				 	sma = sem_obtain_object_check(ns, semid);
			
 
				 	if (IS_ERR(sma)) {
			
@@ -1350,10 +1317,10 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 
				 	curr->sempid = task_tgid_vnr(current);
			
 
				 	sma->sem_ctime = get_seconds();
			
 
				 	/* maybe some queued-up processes were waiting for this */
			
 
				-	do_smart_update(sma, NULL, 0, 0, &tasks);
			
 
				+	do_smart_update(sma, NULL, 0, 0, &wake_q);
			
 
				 	sem_unlock(sma, -1);
			
 
				 	rcu_read_unlock();
			
 
				-	wake_up_sem_queue_do(&tasks);
			
 
				+	wake_up_q(&wake_q);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1365,9 +1332,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
				 	int err, nsems;
			
 
				 	ushort fast_sem_io[SEMMSL_FAST];
			
 
				 	ushort *sem_io = fast_sem_io;
			
 
				-	struct list_head tasks;
			
 
				-
			
 
				-	INIT_LIST_HEAD(&tasks);
			
 
				+	DEFINE_WAKE_Q(wake_q);
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				 	sma = sem_obtain_object_check(ns, semid);
			
@@ -1478,7 +1443,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
				 		}
			
 
				 		sma->sem_ctime = get_seconds();
			
 
				 		/* maybe some queued-up processes were waiting for this */
			
 
				-		do_smart_update(sma, NULL, 0, 0, &tasks);
			
 
				+		do_smart_update(sma, NULL, 0, 0, &wake_q);
			
 
				 		err = 0;
			
 
				 		goto out_unlock;
			
 
				 	}
			
@@ -1514,7 +1479,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
				 	sem_unlock(sma, -1);
			
 
				 out_rcu_wakeup:
			
 
				 	rcu_read_unlock();
			
 
				-	wake_up_sem_queue_do(&tasks);
			
 
				+	wake_up_q(&wake_q);
			
 
				 out_free:
			
 
				 	if (sem_io != fast_sem_io)
			
 
				 		ipc_free(sem_io);
			
@@ -1787,32 +1752,6 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 
				 	return un;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/**
			
 
				- * get_queue_result - retrieve the result code from sem_queue
			
 
				- * @q: Pointer to queue structure
			
 
				- *
			
 
				- * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
			
 
				- * q->status, then we must loop until the value is replaced with the final
			
 
				- * value: This may happen if a task is woken up by an unrelated event (e.g.
			
 
				- * signal) and in parallel the task is woken up by another task because it got
			
 
				- * the requested semaphores.
			
 
				- *
			
 
				- * The function can be called with or without holding the semaphore spinlock.
			
 
				- */
			
 
				-static int get_queue_result(struct sem_queue *q)
			
 
				-{
			
 
				-	int error;
			
 
				-
			
 
				-	error = q->status;
			
 
				-	while (unlikely(error == IN_WAKEUP)) {
			
 
				-		cpu_relax();
			
 
				-		error = q->status;
			
 
				-	}
			
 
				-
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
			
 
				 		unsigned, nsops, const struct timespec __user *, timeout)
			
 
				 {
			
@@ -1821,11 +1760,11 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 	struct sembuf fast_sops[SEMOPM_FAST];
			
 
				 	struct sembuf *sops = fast_sops, *sop;
			
 
				 	struct sem_undo *un;
			
 
				-	int undos = 0, alter = 0, max, locknum;
			
 
				+	int max, locknum;
			
 
				+	bool undos = false, alter = false, dupsop = false;
			
 
				 	struct sem_queue queue;
			
 
				-	unsigned long jiffies_left = 0;
			
 
				+	unsigned long dup = 0, jiffies_left = 0;
			
 
				 	struct ipc_namespace *ns;
			
 
				-	struct list_head tasks;
			
 
				 
			
 
				 	ns = current->nsproxy->ipc_ns;
			
 
				 
			
@@ -1838,10 +1777,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 		if (sops == NULL)
			
 
				 			return -ENOMEM;
			
 
				 	}
			
 
				+
			
 
				 	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
			
 
				 		error =  -EFAULT;
			
 
				 		goto out_free;
			
 
				 	}
			
 
				+
			
 
				 	if (timeout) {
			
 
				 		struct timespec _timeout;
			
 
				 		if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
			
@@ -1855,18 +1796,30 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 		}
			
 
				 		jiffies_left = timespec_to_jiffies(&_timeout);
			
 
				 	}
			
 
				+
			
 
				 	max = 0;
			
 
				 	for (sop = sops; sop < sops + nsops; sop++) {
			
 
				+		unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
			
 
				+
			
 
				 		if (sop->sem_num >= max)
			
 
				 			max = sop->sem_num;
			
 
				 		if (sop->sem_flg & SEM_UNDO)
			
 
				-			undos = 1;
			
 
				-		if (sop->sem_op != 0)
			
 
				-			alter = 1;
			
 
				+			undos = true;
			
 
				+		if (dup & mask) {
			
 
				+			/*
			
 
				+			 * There was a previous alter access that appears
			
 
				+			 * to have accessed the same semaphore, thus use
			
 
				+			 * the dupsop logic. "appears", because the detection
			
 
				+			 * can only check % BITS_PER_LONG.
			
 
				+			 */
			
 
				+			dupsop = true;
			
 
				+		}
			
 
				+		if (sop->sem_op != 0) {
			
 
				+			alter = true;
			
 
				+			dup |= mask;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	INIT_LIST_HEAD(&tasks);
			
 
				-
			
 
				 	if (undos) {
			
 
				 		/* On success, find_alloc_undo takes the rcu_read_lock */
			
 
				 		un = find_alloc_undo(ns, semid);
			
@@ -1887,16 +1840,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 	}
			
 
				 
			
 
				 	error = -EFBIG;
			
 
				-	if (max >= sma->sem_nsems)
			
 
				-		goto out_rcu_wakeup;
			
 
				+	if (max >= sma->sem_nsems) {
			
 
				+		rcu_read_unlock();
			
 
				+		goto out_free;
			
 
				+	}
			
 
				 
			
 
				 	error = -EACCES;
			
 
				-	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
			
 
				-		goto out_rcu_wakeup;
			
 
				+	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
			
 
				+		rcu_read_unlock();
			
 
				+		goto out_free;
			
 
				+	}
			
 
				 
			
 
				 	error = security_sem_semop(sma, sops, nsops, alter);
			
 
				-	if (error)
			
 
				-		goto out_rcu_wakeup;
			
 
				+	if (error) {
			
 
				+		rcu_read_unlock();
			
 
				+		goto out_free;
			
 
				+	}
			
 
				 
			
 
				 	error = -EIDRM;
			
 
				 	locknum = sem_lock(sma, sops, nsops);
			
@@ -1925,24 +1884,34 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 	queue.undo = un;
			
 
				 	queue.pid = task_tgid_vnr(current);
			
 
				 	queue.alter = alter;
			
 
				+	queue.dupsop = dupsop;
			
 
				 
			
 
				 	error = perform_atomic_semop(sma, &queue);
			
 
				-	if (error == 0) {
			
 
				-		/* If the operation was successful, then do
			
 
				+	if (error == 0) { /* non-blocking succesfull path */
			
 
				+		DEFINE_WAKE_Q(wake_q);
			
 
				+
			
 
				+		/*
			
 
				+		 * If the operation was successful, then do
			
 
				 		 * the required updates.
			
 
				 		 */
			
 
				 		if (alter)
			
 
				-			do_smart_update(sma, sops, nsops, 1, &tasks);
			
 
				+			do_smart_update(sma, sops, nsops, 1, &wake_q);
			
 
				 		else
			
 
				 			set_semotime(sma, sops);
			
 
				+
			
 
				+		sem_unlock(sma, locknum);
			
 
				+		rcu_read_unlock();
			
 
				+		wake_up_q(&wake_q);
			
 
				+
			
 
				+		goto out_free;
			
 
				 	}
			
 
				-	if (error <= 0)
			
 
				+	if (error < 0) /* non-blocking error path */
			
 
				 		goto out_unlock_free;
			
 
				 
			
 
				-	/* We need to sleep on this operation, so we put the current
			
 
				+	/*
			
 
				+	 * We need to sleep on this operation, so we put the current
			
 
				 	 * task into the pending queue and go to sleep.
			
 
				 	 */
			
 
				-
			
 
				 	if (nsops == 1) {
			
 
				 		struct sem *curr;
			
 
				 		curr = &sma->sem_base[sops->sem_num];
			
@@ -1971,77 +1940,69 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
				 		sma->complex_count++;
			
 
				 	}
			
 
				 
			
 
				-	queue.status = -EINTR;
			
 
				-	queue.sleeper = current;
			
 
				+	do {
			
 
				+		queue.status = -EINTR;
			
 
				+		queue.sleeper = current;
			
 
				 
			
 
				-sleep_again:
			
 
				-	__set_current_state(TASK_INTERRUPTIBLE);
			
 
				-	sem_unlock(sma, locknum);
			
 
				-	rcu_read_unlock();
			
 
				-
			
 
				-	if (timeout)
			
 
				-		jiffies_left = schedule_timeout(jiffies_left);
			
 
				-	else
			
 
				-		schedule();
			
 
				+		__set_current_state(TASK_INTERRUPTIBLE);
			
 
				+		sem_unlock(sma, locknum);
			
 
				+		rcu_read_unlock();
			
 
				 
			
 
				-	error = get_queue_result(&queue);
			
 
				+		if (timeout)
			
 
				+			jiffies_left = schedule_timeout(jiffies_left);
			
 
				+		else
			
 
				+			schedule();
			
 
				 
			
 
				-	if (error != -EINTR) {
			
 
				-		/* fast path: update_queue already obtained all requested
			
 
				-		 * resources.
			
 
				-		 * Perform a smp_mb(): User space could assume that semop()
			
 
				-		 * is a memory barrier: Without the mb(), the cpu could
			
 
				-		 * speculatively read in user space stale data that was
			
 
				-		 * overwritten by the previous owner of the semaphore.
			
 
				+		/*
			
 
				+		 * fastpath: the semop has completed, either successfully or
			
 
				+		 * not, from the syscall pov, is quite irrelevant to us at this
			
 
				+		 * point; we're done.
			
 
				+		 *
			
 
				+		 * We _do_ care, nonetheless, about being awoken by a signal or
			
 
				+		 * spuriously.  The queue.status is checked again in the
			
 
				+		 * slowpath (aka after taking sem_lock), such that we can detect
			
 
				+		 * scenarios where we were awakened externally, during the
			
 
				+		 * window between wake_q_add() and wake_up_q().
			
 
				 		 */
			
 
				-		smp_mb();
			
 
				-
			
 
				-		goto out_free;
			
 
				-	}
			
 
				-
			
 
				-	rcu_read_lock();
			
 
				-	sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
			
 
				-
			
 
				-	/*
			
 
				-	 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
			
 
				-	 */
			
 
				-	error = get_queue_result(&queue);
			
 
				+		error = READ_ONCE(queue.status);
			
 
				+		if (error != -EINTR) {
			
 
				+			/*
			
 
				+			 * User space could assume that semop() is a memory
			
 
				+			 * barrier: Without the mb(), the cpu could
			
 
				+			 * speculatively read in userspace stale data that was
			
 
				+			 * overwritten by the previous owner of the semaphore.
			
 
				+			 */
			
 
				+			smp_mb();
			
 
				+			goto out_free;
			
 
				+		}
			
 
				 
			
 
				-	/*
			
 
				-	 * Array removed? If yes, leave without sem_unlock().
			
 
				-	 */
			
 
				-	if (IS_ERR(sma)) {
			
 
				-		rcu_read_unlock();
			
 
				-		goto out_free;
			
 
				-	}
			
 
				+		rcu_read_lock();
			
 
				+		sem_lock(sma, sops, nsops);
			
 
				 
			
 
				+		if (!ipc_valid_object(&sma->sem_perm))
			
 
				+			goto out_unlock_free;
			
 
				 
			
 
				-	/*
			
 
				-	 * If queue.status != -EINTR we are woken up by another process.
			
 
				-	 * Leave without unlink_queue(), but with sem_unlock().
			
 
				-	 */
			
 
				-	if (error != -EINTR)
			
 
				-		goto out_unlock_free;
			
 
				+		error = READ_ONCE(queue.status);
			
 
				 
			
 
				-	/*
			
 
				-	 * If an interrupt occurred we have to clean up the queue
			
 
				-	 */
			
 
				-	if (timeout && jiffies_left == 0)
			
 
				-		error = -EAGAIN;
			
 
				+		/*
			
 
				+		 * If queue.status != -EINTR we are woken up by another process.
			
 
				+		 * Leave without unlink_queue(), but with sem_unlock().
			
 
				+		 */
			
 
				+		if (error != -EINTR)
			
 
				+			goto out_unlock_free;
			
 
				 
			
 
				-	/*
			
 
				-	 * If the wakeup was spurious, just retry
			
 
				-	 */
			
 
				-	if (error == -EINTR && !signal_pending(current))
			
 
				-		goto sleep_again;
			
 
				+		/*
			
 
				+		 * If an interrupt occurred we have to clean up the queue.
			
 
				+		 */
			
 
				+		if (timeout && jiffies_left == 0)
			
 
				+			error = -EAGAIN;
			
 
				+	} while (error == -EINTR && !signal_pending(current)); /* spurious */
			
 
				 
			
 
				 	unlink_queue(sma, &queue);
			
 
				 
			
 
				 out_unlock_free:
			
 
				 	sem_unlock(sma, locknum);
			
 
				-out_rcu_wakeup:
			
 
				 	rcu_read_unlock();
			
 
				-	wake_up_sem_queue_do(&tasks);
			
 
				 out_free:
			
 
				 	if (sops != fast_sops)
			
 
				 		kfree(sops);
			
@@ -2102,8 +2063,8 @@ void exit_sem(struct task_struct *tsk)
 
				 	for (;;) {
			
 
				 		struct sem_array *sma;
			
 
				 		struct sem_undo *un;
			
 
				-		struct list_head tasks;
			
 
				 		int semid, i;
			
 
				+		DEFINE_WAKE_Q(wake_q);
			
 
				 
			
 
				 		cond_resched();
			
 
				 
			
@@ -2191,11 +2152,10 @@ void exit_sem(struct task_struct *tsk)
 
				 			}
			
 
				 		}
			
 
				 		/* maybe some queued-up processes were waiting for this */
			
 
				-		INIT_LIST_HEAD(&tasks);
			
 
				-		do_smart_update(sma, NULL, 0, 1, &tasks);
			
 
				+		do_smart_update(sma, NULL, 0, 1, &wake_q);
			
 
				 		sem_unlock(sma, -1);
			
 
				 		rcu_read_unlock();
			
 
				-		wake_up_sem_queue_do(&tasks);
			
 
				+		wake_up_q(&wake_q);
			
 
				 
			
 
				 		kfree_rcu(un, rcu);
			
 
				 	}
			
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -89,6 +89,7 @@ void shm_init_ns(struct ipc_namespace *ns)
 
				 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
			
 
				 {
			
 
				 	struct shmid_kernel *shp;
			
 
				+
			
 
				 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
			
 
				 
			
 
				 	if (shp->shm_nattch) {
			
@@ -387,6 +388,7 @@ static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
 
				 	struct file *file = vma->vm_file;
			
 
				 	struct shm_file_data *sfd = shm_file_data(file);
			
 
				 	int err = 0;
			
 
				+
			
 
				 	if (sfd->vm_ops->set_policy)
			
 
				 		err = sfd->vm_ops->set_policy(vma, new);
			
 
				 	return err;
			
@@ -417,7 +419,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 
				 	 * In case of remap_file_pages() emulation, the file can represent
			
 
				 	 * removed IPC ID: propogate shm_lock() error to caller.
			
 
				 	 */
			
 
				-	ret =__shm_open(vma);
			
 
				+	ret = __shm_open(vma);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
@@ -468,6 +470,7 @@ static unsigned long shm_get_unmapped_area(struct file *file,
 
				 	unsigned long flags)
			
 
				 {
			
 
				 	struct shm_file_data *sfd = shm_file_data(file);
			
 
				+
			
 
				 	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
			
 
				 						pgoff, flags);
			
 
				 }
			
@@ -766,6 +769,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 
				 	} else {
			
 
				 #ifdef CONFIG_SHMEM
			
 
				 		struct shmem_inode_info *info = SHMEM_I(inode);
			
 
				+
			
 
				 		spin_lock_irq(&info->lock);
			
 
				 		*rss_add += inode->i_mapping->nrpages;
			
 
				 		*swp_add += info->swapped;
			
@@ -1028,6 +1032,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 
				 
			
 
				 		if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
			
 
				 			kuid_t euid = current_euid();
			
 
				+
			
 
				 			if (!uid_eq(euid, shp->shm_perm.uid) &&
			
 
				 			    !uid_eq(euid, shp->shm_perm.cuid)) {
			
 
				 				err = -EPERM;
			
@@ -1045,6 +1050,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 
				 
			
 
				 		if (cmd == SHM_LOCK) {
			
 
				 			struct user_struct *user = current_user();
			
 
				+
			
 
				 			err = shmem_lock(shm_file, 1, user);
			
 
				 			if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
			
 
				 				shp->shm_perm.mode |= SHM_LOCKED;
			
@@ -1354,9 +1360,10 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 
				 		vma = next;
			
 
				 	}
			
 
				 
			
 
				-#else /* CONFIG_MMU */
			
 
				+#else	/* CONFIG_MMU */
			
 
				 	/* under NOMMU conditions, the exact address to be destroyed must be
			
 
				-	 * given */
			
 
				+	 * given
			
 
				+	 */
			
 
				 	if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
			
 
				 		do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
			
 
				 		retval = 0;
			
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 
				 obj-$(CONFIG_KGDB) += debug/
			
 
				 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
			
 
				 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
			
 
				+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
			
 
				 obj-$(CONFIG_SECCOMP) += seccomp.o
			
 
				 obj-$(CONFIG_RELAY) += relay.o
			
 
				 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
			
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -598,11 +598,11 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
 
				 	/*
			
 
				 	 * Wait for the other CPUs to be notified and be waiting for us:
			
 
				 	 */
			
 
				-	time_left = loops_per_jiffy * HZ;
			
 
				+	time_left = MSEC_PER_SEC;
			
 
				 	while (kgdb_do_roundup && --time_left &&
			
 
				 	       (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) !=
			
 
				 		   online_cpus)
			
 
				-		cpu_relax();
			
 
				+		udelay(1000);
			
 
				 	if (!time_left)
			
 
				 		pr_crit("Timed out waiting for secondary CPUs.\n");
			
 
				 
			
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -30,6 +30,7 @@
 
				 char kdb_prompt_str[CMD_BUFLEN];
			
 
				 
			
 
				 int kdb_trap_printk;
			
 
				+int kdb_printf_cpu = -1;
			
 
				 
			
 
				 static int kgdb_transition_check(char *buffer)
			
 
				 {
			
@@ -554,31 +555,26 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 
				 	int linecount;
			
 
				 	int colcount;
			
 
				 	int logging, saved_loglevel = 0;
			
 
				-	int saved_trap_printk;
			
 
				-	int got_printf_lock = 0;
			
 
				 	int retlen = 0;
			
 
				 	int fnd, len;
			
 
				+	int this_cpu, old_cpu;
			
 
				 	char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
			
 
				 	char *moreprompt = "more> ";
			
 
				 	struct console *c = console_drivers;
			
 
				-	static DEFINE_SPINLOCK(kdb_printf_lock);
			
 
				 	unsigned long uninitialized_var(flags);
			
 
				 
			
 
				-	preempt_disable();
			
 
				-	saved_trap_printk = kdb_trap_printk;
			
 
				-	kdb_trap_printk = 0;
			
 
				-
			
 
				 	/* Serialize kdb_printf if multiple cpus try to write at once.
			
 
				 	 * But if any cpu goes recursive in kdb, just print the output,
			
 
				 	 * even if it is interleaved with any other text.
			
 
				 	 */
			
 
				-	if (!KDB_STATE(PRINTF_LOCK)) {
			
 
				-		KDB_STATE_SET(PRINTF_LOCK);
			
 
				-		spin_lock_irqsave(&kdb_printf_lock, flags);
			
 
				-		got_printf_lock = 1;
			
 
				-		atomic_inc(&kdb_event);
			
 
				-	} else {
			
 
				-		__acquire(kdb_printf_lock);
			
 
				+	local_irq_save(flags);
			
 
				+	this_cpu = smp_processor_id();
			
 
				+	for (;;) {
			
 
				+		old_cpu = cmpxchg(&kdb_printf_cpu, -1, this_cpu);
			
 
				+		if (old_cpu == -1 || old_cpu == this_cpu)
			
 
				+			break;
			
 
				+
			
 
				+		cpu_relax();
			
 
				 	}
			
 
				 
			
 
				 	diag = kdbgetintenv("LINES", &linecount);
			
@@ -847,16 +843,9 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 
				 	suspend_grep = 0; /* end of what may have been a recursive call */
			
 
				 	if (logging)
			
 
				 		console_loglevel = saved_loglevel;
			
 
				-	if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) {
			
 
				-		got_printf_lock = 0;
			
 
				-		spin_unlock_irqrestore(&kdb_printf_lock, flags);
			
 
				-		KDB_STATE_CLEAR(PRINTF_LOCK);
			
 
				-		atomic_dec(&kdb_event);
			
 
				-	} else {
			
 
				-		__release(kdb_printf_lock);
			
 
				-	}
			
 
				-	kdb_trap_printk = saved_trap_printk;
			
 
				-	preempt_enable();
			
 
				+	/* kdb_printf_cpu locked the code above. */
			
 
				+	smp_store_release(&kdb_printf_cpu, old_cpu);
			
 
				+	local_irq_restore(flags);
			
 
				 	return retlen;
			
 
				 }
			
 
				 
			
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -60,7 +60,6 @@ int kdb_grep_trailing;
 
				  * Kernel debugger state flags
			
 
				  */
			
 
				 int kdb_flags;
			
 
				-atomic_t kdb_event;
			
 
				 
			
 
				 /*
			
 
				  * kdb_lock protects updates to kdb_initial_cpu.  Used to
			
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -132,7 +132,6 @@ extern int kdb_state;
 
				 #define KDB_STATE_PAGER		0x00000400	/* pager is available */
			
 
				 #define KDB_STATE_GO_SWITCH	0x00000800	/* go is switching
			
 
				 						 * back to initial cpu */
			
 
				-#define KDB_STATE_PRINTF_LOCK	0x00001000	/* Holds kdb_printf lock */
			
 
				 #define KDB_STATE_WAIT_IPI	0x00002000	/* Waiting for kdb_ipi() NMI */
			
 
				 #define KDB_STATE_RECURSE	0x00004000	/* Recursive entry to kdb */
			
 
				 #define KDB_STATE_IP_ADJUSTED	0x00008000	/* Restart IP has been
			
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -301,7 +301,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 
				 retry:
			
 
				 	/* Read the page with vaddr into memory */
			
 
				 	ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
			
 
				-			&vma);
			
 
				+			&vma, NULL);
			
 
				 	if (ret <= 0)
			
 
				 		return ret;
			
 
				 
			
@@ -1712,7 +1712,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 
				 	 * essentially a kernel access to the memory.
			
 
				 	 */
			
 
				 	result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
			
 
				-			NULL);
			
 
				+			NULL, NULL);
			
 
				 	if (result < 0)
			
 
				 		return result;
			
 
				 
			
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -1,11 +1,16 @@
 
				 #define pr_fmt(fmt) "kcov: " fmt
			
 
				 
			
 
				 #define DISABLE_BRANCH_PROFILING
			
 
				+#include <linux/atomic.h>
			
 
				 #include <linux/compiler.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/export.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/file.h>
			
 
				 #include <linux/fs.h>
			
 
				+#include <linux/init.h>
			
 
				 #include <linux/mm.h>
			
 
				+#include <linux/preempt.h>
			
 
				 #include <linux/printk.h>
			
 
				 #include <linux/sched.h>
			
 
				 #include <linux/slab.h>
			
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -441,6 +441,8 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
 
				 	while (hole_end <= crashk_res.end) {
			
 
				 		unsigned long i;
			
 
				 
			
 
				+		cond_resched();
			
 
				+
			
 
				 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
			
 
				 			break;
			
 
				 		/* See if I overlap any of the segments */
			
@@ -1467,9 +1469,6 @@ static int __init crash_save_vmcoreinfo_init(void)
 
				 #endif
			
 
				 	VMCOREINFO_NUMBER(PG_head_mask);
			
 
				 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
			
 
				-#ifdef CONFIG_X86
			
 
				-	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
			
 
				-#endif
			
 
				 #ifdef CONFIG_HUGETLB_PAGE
			
 
				 	VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
			
 
				 #endif
			
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1926,7 +1926,8 @@ int vprintk_default(const char *fmt, va_list args)
 
				 	int r;
			
 
				 
			
 
				 #ifdef CONFIG_KGDB_KDB
			
 
				-	if (unlikely(kdb_trap_printk)) {
			
 
				+	/* Allow to pass printk() to kdb but avoid a recursion. */
			
 
				+	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
			
 
				 		r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
			
 
				 		return r;
			
 
				 	}
			
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -809,11 +809,11 @@ void relay_subbufs_consumed(struct rchan *chan,
 
				 {
			
 
				 	struct rchan_buf *buf;
			
 
				 
			
 
				-	if (!chan)
			
 
				+	if (!chan || cpu >= NR_CPUS)
			
 
				 		return;
			
 
				 
			
 
				 	buf = *per_cpu_ptr(chan->buf, cpu);
			
 
				-	if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs)
			
 
				+	if (!buf || subbufs_consumed > chan->n_subbufs)
			
 
				 		return;
			
 
				 
			
 
				 	if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
			
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2491,6 +2491,13 @@ void __set_current_blocked(const sigset_t *newset)
 
				 {
			
 
				 	struct task_struct *tsk = current;
			
 
				 
			
 
				+	/*
			
 
				+	 * In case the signal mask hasn't changed, there is nothing we need
			
 
				+	 * to do. The current->blocked shouldn't be modified by other task.
			
 
				+	 */
			
 
				+	if (sigequalsets(&tsk->blocked, newset))
			
 
				+		return;
			
 
				+
			
 
				 	spin_lock_irq(&tsk->sighand->siglock);
			
 
				 	__set_task_blocked(tsk, newset);
			
 
				 	spin_unlock_irq(&tsk->sighand->siglock);
			
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2389,9 +2389,11 @@ static void validate_coredump_safety(void)
 
				 #ifdef CONFIG_COREDUMP
			
 
				 	if (suid_dumpable == SUID_DUMP_ROOT &&
			
 
				 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
			
 
				-		printk(KERN_WARNING "Unsafe core_pattern used with "\
			
 
				-			"suid_dumpable=2. Pipe handler or fully qualified "\
			
 
				-			"core dump path required.\n");
			
 
				+		printk(KERN_WARNING
			
 
				+"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
			
 
				+"Pipe handler or fully qualified core dump path required.\n"
			
 
				+"Set kernel.core_pattern before fs.suid_dumpable.\n"
			
 
				+		);
			
 
				 	}
			
 
				 #endif
			
 
				 }
			
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1354,8 +1354,8 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 
				 			"warning: process `%s' used the deprecated sysctl "
			
 
				 			"system call with ", current->comm);
			
 
				 		for (i = 0; i < nlen; i++)
			
 
				-			printk("%d.", name[i]);
			
 
				-		printk("\n");
			
 
				+			printk(KERN_CONT "%d.", name[i]);
			
 
				+		printk(KERN_CONT "\n");
			
 
				 	}
			
 
				 	return;
			
 
				 }
			
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -516,7 +516,8 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
 
				 
			
 
				 	spin_lock_irqsave(&ptr->it_lock, flags);
			
 
				 	if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) {
			
 
				-		if (posix_timer_event(ptr, 0) != 0)
			
 
				+		if (IS_ENABLED(CONFIG_POSIX_TIMERS) &&
			
 
				+		    posix_timer_event(ptr, 0) != 0)
			
 
				 			ptr->it_overrun++;
			
 
				 	}
			
 
				 
			
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,32 +24,14 @@
 
				 
			
 
				 #include <asm/irq_regs.h>
			
 
				 #include <linux/kvm_para.h>
			
 
				-#include <linux/perf_event.h>
			
 
				 #include <linux/kthread.h>
			
 
				 
			
 
				-/*
			
 
				- * The run state of the lockup detectors is controlled by the content of the
			
 
				- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
			
 
				- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
			
 
				- *
			
 
				- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
			
 
				- * are variables that are only used as an 'interface' between the parameters
			
 
				- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
			
 
				- * 'watchdog_thresh' variable is handled differently because its value is not
			
 
				- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
			
 
				- * is equal zero.
			
 
				- */
			
 
				-#define NMI_WATCHDOG_ENABLED_BIT   0
			
 
				-#define SOFT_WATCHDOG_ENABLED_BIT  1
			
 
				-#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
			
 
				-#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
			
 
				-
			
 
				 static DEFINE_MUTEX(watchdog_proc_mutex);
			
 
				 
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
			
 
				+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
			
 
				+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
			
 
				 #else
			
 
				-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
			
 
				+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
			
 
				 #endif
			
 
				 int __read_mostly nmi_watchdog_enabled;
			
 
				 int __read_mostly soft_watchdog_enabled;
			
@@ -59,9 +41,6 @@ int __read_mostly watchdog_thresh = 10;
 
				 #ifdef CONFIG_SMP
			
 
				 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
			
 
				 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
			
 
				-#else
			
 
				-#define sysctl_softlockup_all_cpu_backtrace 0
			
 
				-#define sysctl_hardlockup_all_cpu_backtrace 0
			
 
				 #endif
			
 
				 static struct cpumask watchdog_cpumask __read_mostly;
			
 
				 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
			
@@ -100,50 +79,9 @@ static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 
				 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
			
 
				 static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
			
 
				 static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
			
 
				-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
			
 
				 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
			
 
				-static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
			
 
				-#endif
			
 
				 static unsigned long soft_lockup_nmi_warn;
			
 
				 
			
 
				-/* boot commands */
			
 
				-/*
			
 
				- * Should we panic when a soft-lockup or hard-lockup occurs:
			
 
				- */
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-unsigned int __read_mostly hardlockup_panic =
			
 
				-			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
			
 
				-static unsigned long hardlockup_allcpu_dumped;
			
 
				-/*
			
 
				- * We may not want to enable hard lockup detection by default in all cases,
			
 
				- * for example when running the kernel as a guest on a hypervisor. In these
			
 
				- * cases this function can be called to disable hard lockup detection. This
			
 
				- * function should only be executed once by the boot processor before the
			
 
				- * kernel command line parameters are parsed, because otherwise it is not
			
 
				- * possible to override this in hardlockup_panic_setup().
			
 
				- */
			
 
				-void hardlockup_detector_disable(void)
			
 
				-{
			
 
				-	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
			
 
				-}
			
 
				-
			
 
				-static int __init hardlockup_panic_setup(char *str)
			
 
				-{
			
 
				-	if (!strncmp(str, "panic", 5))
			
 
				-		hardlockup_panic = 1;
			
 
				-	else if (!strncmp(str, "nopanic", 7))
			
 
				-		hardlockup_panic = 0;
			
 
				-	else if (!strncmp(str, "0", 1))
			
 
				-		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
			
 
				-	else if (!strncmp(str, "1", 1))
			
 
				-		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
			
 
				-	return 1;
			
 
				-}
			
 
				-__setup("nmi_watchdog=", hardlockup_panic_setup);
			
 
				-#endif
			
 
				-
			
 
				 unsigned int __read_mostly softlockup_panic =
			
 
				 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
			
 
				 
			
@@ -264,32 +202,14 @@ void touch_all_softlockup_watchdogs(void)
 
				 	wq_watchdog_touch(-1);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-void touch_nmi_watchdog(void)
			
 
				-{
			
 
				-	/*
			
 
				-	 * Using __raw here because some code paths have
			
 
				-	 * preemption enabled.  If preemption is enabled
			
 
				-	 * then interrupts should be enabled too, in which
			
 
				-	 * case we shouldn't have to worry about the watchdog
			
 
				-	 * going off.
			
 
				-	 */
			
 
				-	raw_cpu_write(watchdog_nmi_touch, true);
			
 
				-	touch_softlockup_watchdog();
			
 
				-}
			
 
				-EXPORT_SYMBOL(touch_nmi_watchdog);
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				 void touch_softlockup_watchdog_sync(void)
			
 
				 {
			
 
				 	__this_cpu_write(softlockup_touch_sync, true);
			
 
				 	__this_cpu_write(watchdog_touch_ts, 0);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				 /* watchdog detector functions */
			
 
				-static bool is_hardlockup(void)
			
 
				+bool is_hardlockup(void)
			
 
				 {
			
 
				 	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
			
 
				 
			
@@ -299,7 +219,6 @@ static bool is_hardlockup(void)
 
				 	__this_cpu_write(hrtimer_interrupts_saved, hrint);
			
 
				 	return false;
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 static int is_softlockup(unsigned long touch_ts)
			
 
				 {
			
@@ -313,78 +232,22 @@ static int is_softlockup(unsigned long touch_ts)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-
			
 
				-static struct perf_event_attr wd_hw_attr = {
			
 
				-	.type		= PERF_TYPE_HARDWARE,
			
 
				-	.config		= PERF_COUNT_HW_CPU_CYCLES,
			
 
				-	.size		= sizeof(struct perf_event_attr),
			
 
				-	.pinned		= 1,
			
 
				-	.disabled	= 1,
			
 
				-};
			
 
				-
			
 
				-/* Callback function for perf event subsystem */
			
 
				-static void watchdog_overflow_callback(struct perf_event *event,
			
 
				-		 struct perf_sample_data *data,
			
 
				-		 struct pt_regs *regs)
			
 
				-{
			
 
				-	/* Ensure the watchdog never gets throttled */
			
 
				-	event->hw.interrupts = 0;
			
 
				-
			
 
				-	if (__this_cpu_read(watchdog_nmi_touch) == true) {
			
 
				-		__this_cpu_write(watchdog_nmi_touch, false);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* check for a hardlockup
			
 
				-	 * This is done by making sure our timer interrupt
			
 
				-	 * is incrementing.  The timer interrupt should have
			
 
				-	 * fired multiple times before we overflow'd.  If it hasn't
			
 
				-	 * then this is a good indication the cpu is stuck
			
 
				-	 */
			
 
				-	if (is_hardlockup()) {
			
 
				-		int this_cpu = smp_processor_id();
			
 
				-		struct pt_regs *regs = get_irq_regs();
			
 
				-
			
 
				-		/* only print hardlockups once */
			
 
				-		if (__this_cpu_read(hard_watchdog_warn) == true)
			
 
				-			return;
			
 
				-
			
 
				-		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
			
 
				-		print_modules();
			
 
				-		print_irqtrace_events(current);
			
 
				-		if (regs)
			
 
				-			show_regs(regs);
			
 
				-		else
			
 
				-			dump_stack();
			
 
				-
			
 
				-		/*
			
 
				-		 * Perform all-CPU dump only once to avoid multiple hardlockups
			
 
				-		 * generating interleaving traces
			
 
				-		 */
			
 
				-		if (sysctl_hardlockup_all_cpu_backtrace &&
			
 
				-				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
			
 
				-			trigger_allbutself_cpu_backtrace();
			
 
				-
			
 
				-		if (hardlockup_panic)
			
 
				-			nmi_panic(regs, "Hard LOCKUP");
			
 
				-
			
 
				-		__this_cpu_write(hard_watchdog_warn, true);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	__this_cpu_write(hard_watchdog_warn, false);
			
 
				-	return;
			
 
				-}
			
 
				-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
			
 
				-
			
 
				 static void watchdog_interrupt_count(void)
			
 
				 {
			
 
				 	__this_cpu_inc(hrtimer_interrupts);
			
 
				 }
			
 
				 
			
 
				-static int watchdog_nmi_enable(unsigned int cpu);
			
 
				-static void watchdog_nmi_disable(unsigned int cpu);
			
 
				+/*
			
 
				+ * These two functions are mostly architecture specific
			
 
				+ * defining them as weak here.
			
 
				+ */
			
 
				+int __weak watchdog_nmi_enable(unsigned int cpu)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+void __weak watchdog_nmi_disable(unsigned int cpu)
			
 
				+{
			
 
				+}
			
 
				 
			
 
				 static int watchdog_enable_all_cpus(void);
			
 
				 static void watchdog_disable_all_cpus(void);
			
@@ -577,109 +440,6 @@ static void watchdog(unsigned int cpu)
 
				 		watchdog_nmi_disable(cpu);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_HARDLOCKUP_DETECTOR
			
 
				-/*
			
 
				- * People like the simple clean cpu node info on boot.
			
 
				- * Reduce the watchdog noise by only printing messages
			
 
				- * that are different from what cpu0 displayed.
			
 
				- */
			
 
				-static unsigned long cpu0_err;
			
 
				-
			
 
				-static int watchdog_nmi_enable(unsigned int cpu)
			
 
				-{
			
 
				-	struct perf_event_attr *wd_attr;
			
 
				-	struct perf_event *event = per_cpu(watchdog_ev, cpu);
			
 
				-
			
 
				-	/* nothing to do if the hard lockup detector is disabled */
			
 
				-	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
			
 
				-		goto out;
			
 
				-
			
 
				-	/* is it already setup and enabled? */
			
 
				-	if (event && event->state > PERF_EVENT_STATE_OFF)
			
 
				-		goto out;
			
 
				-
			
 
				-	/* it is setup but not enabled */
			
 
				-	if (event != NULL)
			
 
				-		goto out_enable;
			
 
				-
			
 
				-	wd_attr = &wd_hw_attr;
			
 
				-	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
			
 
				-
			
 
				-	/* Try to register using hardware perf events */
			
 
				-	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
			
 
				-
			
 
				-	/* save cpu0 error for future comparision */
			
 
				-	if (cpu == 0 && IS_ERR(event))
			
 
				-		cpu0_err = PTR_ERR(event);
			
 
				-
			
 
				-	if (!IS_ERR(event)) {
			
 
				-		/* only print for cpu0 or different than cpu0 */
			
 
				-		if (cpu == 0 || cpu0_err)
			
 
				-			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
			
 
				-		goto out_save;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Disable the hard lockup detector if _any_ CPU fails to set up
			
 
				-	 * set up the hardware perf event. The watchdog() function checks
			
 
				-	 * the NMI_WATCHDOG_ENABLED bit periodically.
			
 
				-	 *
			
 
				-	 * The barriers are for syncing up watchdog_enabled across all the
			
 
				-	 * cpus, as clear_bit() does not use barriers.
			
 
				-	 */
			
 
				-	smp_mb__before_atomic();
			
 
				-	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
			
 
				-	smp_mb__after_atomic();
			
 
				-
			
 
				-	/* skip displaying the same error again */
			
 
				-	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
			
 
				-		return PTR_ERR(event);
			
 
				-
			
 
				-	/* vary the KERN level based on the returned errno */
			
 
				-	if (PTR_ERR(event) == -EOPNOTSUPP)
			
 
				-		pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
			
 
				-	else if (PTR_ERR(event) == -ENOENT)
			
 
				-		pr_warn("disabled (cpu%i): hardware events not enabled\n",
			
 
				-			 cpu);
			
 
				-	else
			
 
				-		pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
			
 
				-			cpu, PTR_ERR(event));
			
 
				-
			
 
				-	pr_info("Shutting down hard lockup detector on all cpus\n");
			
 
				-
			
 
				-	return PTR_ERR(event);
			
 
				-
			
 
				-	/* success path */
			
 
				-out_save:
			
 
				-	per_cpu(watchdog_ev, cpu) = event;
			
 
				-out_enable:
			
 
				-	perf_event_enable(per_cpu(watchdog_ev, cpu));
			
 
				-out:
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void watchdog_nmi_disable(unsigned int cpu)
			
 
				-{
			
 
				-	struct perf_event *event = per_cpu(watchdog_ev, cpu);
			
 
				-
			
 
				-	if (event) {
			
 
				-		perf_event_disable(event);
			
 
				-		per_cpu(watchdog_ev, cpu) = NULL;
			
 
				-
			
 
				-		/* should be in cleanup, but blocks oprofile */
			
 
				-		perf_event_release_kernel(event);
			
 
				-	}
			
 
				-	if (cpu == 0) {
			
 
				-		/* watchdog_nmi_enable() expects this to be zero initially. */
			
 
				-		cpu0_err = 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-#else
			
 
				-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
			
 
				-static void watchdog_nmi_disable(unsigned int cpu) { return; }
			
 
				-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
			
 
				-
			
 
				 static struct smp_hotplug_thread watchdog_threads = {
			
 
				 	.store			= &softlockup_watchdog,
			
 
				 	.thread_should_run	= watchdog_should_run,
			
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -0,0 +1,227 @@
 
				+/*
			
 
				+ * Detect hard lockups on a system
			
 
				+ *
			
 
				+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
			
 
				+ *
			
 
				+ * Note: Most of this code is borrowed heavily from the original softlockup
			
 
				+ * detector, so thanks to Ingo for the initial implementation.
			
 
				+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
			
 
				+ * to those contributors as well.
			
 
				+ */
			
 
				+
			
 
				+#define pr_fmt(fmt) "NMI watchdog: " fmt
			
 
				+
			
 
				+#include <linux/nmi.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <asm/irq_regs.h>
			
 
				+#include <linux/perf_event.h>
			
 
				+
			
 
				+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
			
 
				+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
			
 
				+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
			
 
				+
			
 
				+/* boot commands */
			
 
				+/*
			
 
				+ * Should we panic when a soft-lockup or hard-lockup occurs:
			
 
				+ */
			
 
				+unsigned int __read_mostly hardlockup_panic =
			
 
				+			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
			
 
				+static unsigned long hardlockup_allcpu_dumped;
			
 
				+/*
			
 
				+ * We may not want to enable hard lockup detection by default in all cases,
			
 
				+ * for example when running the kernel as a guest on a hypervisor. In these
			
 
				+ * cases this function can be called to disable hard lockup detection. This
			
 
				+ * function should only be executed once by the boot processor before the
			
 
				+ * kernel command line parameters are parsed, because otherwise it is not
			
 
				+ * possible to override this in hardlockup_panic_setup().
			
 
				+ */
			
 
				+void hardlockup_detector_disable(void)
			
 
				+{
			
 
				+	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
			
 
				+}
			
 
				+
			
 
				+static int __init hardlockup_panic_setup(char *str)
			
 
				+{
			
 
				+	if (!strncmp(str, "panic", 5))
			
 
				+		hardlockup_panic = 1;
			
 
				+	else if (!strncmp(str, "nopanic", 7))
			
 
				+		hardlockup_panic = 0;
			
 
				+	else if (!strncmp(str, "0", 1))
			
 
				+		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
			
 
				+	else if (!strncmp(str, "1", 1))
			
 
				+		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
			
 
				+	return 1;
			
 
				+}
			
 
				+__setup("nmi_watchdog=", hardlockup_panic_setup);
			
 
				+
			
 
				+void touch_nmi_watchdog(void)
			
 
				+{
			
 
				+	/*
			
 
				+	 * Using __raw here because some code paths have
			
 
				+	 * preemption enabled.  If preemption is enabled
			
 
				+	 * then interrupts should be enabled too, in which
			
 
				+	 * case we shouldn't have to worry about the watchdog
			
 
				+	 * going off.
			
 
				+	 */
			
 
				+	raw_cpu_write(watchdog_nmi_touch, true);
			
 
				+	touch_softlockup_watchdog();
			
 
				+}
			
 
				+EXPORT_SYMBOL(touch_nmi_watchdog);
			
 
				+
			
 
				+static struct perf_event_attr wd_hw_attr = {
			
 
				+	.type		= PERF_TYPE_HARDWARE,
			
 
				+	.config		= PERF_COUNT_HW_CPU_CYCLES,
			
 
				+	.size		= sizeof(struct perf_event_attr),
			
 
				+	.pinned		= 1,
			
 
				+	.disabled	= 1,
			
 
				+};
			
 
				+
			
 
				+/* Callback function for perf event subsystem */
			
 
				+static void watchdog_overflow_callback(struct perf_event *event,
			
 
				+		 struct perf_sample_data *data,
			
 
				+		 struct pt_regs *regs)
			
 
				+{
			
 
				+	/* Ensure the watchdog never gets throttled */
			
 
				+	event->hw.interrupts = 0;
			
 
				+
			
 
				+	if (__this_cpu_read(watchdog_nmi_touch) == true) {
			
 
				+		__this_cpu_write(watchdog_nmi_touch, false);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* check for a hardlockup
			
 
				+	 * This is done by making sure our timer interrupt
			
 
				+	 * is incrementing.  The timer interrupt should have
			
 
				+	 * fired multiple times before we overflow'd.  If it hasn't
			
 
				+	 * then this is a good indication the cpu is stuck
			
 
				+	 */
			
 
				+	if (is_hardlockup()) {
			
 
				+		int this_cpu = smp_processor_id();
			
 
				+
			
 
				+		/* only print hardlockups once */
			
 
				+		if (__this_cpu_read(hard_watchdog_warn) == true)
			
 
				+			return;
			
 
				+
			
 
				+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
			
 
				+		print_modules();
			
 
				+		print_irqtrace_events(current);
			
 
				+		if (regs)
			
 
				+			show_regs(regs);
			
 
				+		else
			
 
				+			dump_stack();
			
 
				+
			
 
				+		/*
			
 
				+		 * Perform all-CPU dump only once to avoid multiple hardlockups
			
 
				+		 * generating interleaving traces
			
 
				+		 */
			
 
				+		if (sysctl_hardlockup_all_cpu_backtrace &&
			
 
				+				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
			
 
				+			trigger_allbutself_cpu_backtrace();
			
 
				+
			
 
				+		if (hardlockup_panic)
			
 
				+			nmi_panic(regs, "Hard LOCKUP");
			
 
				+
			
 
				+		__this_cpu_write(hard_watchdog_warn, true);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	__this_cpu_write(hard_watchdog_warn, false);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * People like the simple clean cpu node info on boot.
			
 
				+ * Reduce the watchdog noise by only printing messages
			
 
				+ * that are different from what cpu0 displayed.
			
 
				+ */
			
 
				+static unsigned long cpu0_err;
			
 
				+
			
 
				+int watchdog_nmi_enable(unsigned int cpu)
			
 
				+{
			
 
				+	struct perf_event_attr *wd_attr;
			
 
				+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
			
 
				+
			
 
				+	/* nothing to do if the hard lockup detector is disabled */
			
 
				+	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
			
 
				+		goto out;
			
 
				+
			
 
				+	/* is it already setup and enabled? */
			
 
				+	if (event && event->state > PERF_EVENT_STATE_OFF)
			
 
				+		goto out;
			
 
				+
			
 
				+	/* it is setup but not enabled */
			
 
				+	if (event != NULL)
			
 
				+		goto out_enable;
			
 
				+
			
 
				+	wd_attr = &wd_hw_attr;
			
 
				+	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
			
 
				+
			
 
				+	/* Try to register using hardware perf events */
			
 
				+	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
			
 
				+
			
 
				+	/* save cpu0 error for future comparision */
			
 
				+	if (cpu == 0 && IS_ERR(event))
			
 
				+		cpu0_err = PTR_ERR(event);
			
 
				+
			
 
				+	if (!IS_ERR(event)) {
			
 
				+		/* only print for cpu0 or different than cpu0 */
			
 
				+		if (cpu == 0 || cpu0_err)
			
 
				+			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
			
 
				+		goto out_save;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Disable the hard lockup detector if _any_ CPU fails to set up
			
 
				+	 * set up the hardware perf event. The watchdog() function checks
			
 
				+	 * the NMI_WATCHDOG_ENABLED bit periodically.
			
 
				+	 *
			
 
				+	 * The barriers are for syncing up watchdog_enabled across all the
			
 
				+	 * cpus, as clear_bit() does not use barriers.
			
 
				+	 */
			
 
				+	smp_mb__before_atomic();
			
 
				+	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
			
 
				+	smp_mb__after_atomic();
			
 
				+
			
 
				+	/* skip displaying the same error again */
			
 
				+	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
			
 
				+		return PTR_ERR(event);
			
 
				+
			
 
				+	/* vary the KERN level based on the returned errno */
			
 
				+	if (PTR_ERR(event) == -EOPNOTSUPP)
			
 
				+		pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
			
 
				+	else if (PTR_ERR(event) == -ENOENT)
			
 
				+		pr_warn("disabled (cpu%i): hardware events not enabled\n",
			
 
				+			 cpu);
			
 
				+	else
			
 
				+		pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
			
 
				+			cpu, PTR_ERR(event));
			
 
				+
			
 
				+	pr_info("Shutting down hard lockup detector on all cpus\n");
			
 
				+
			
 
				+	return PTR_ERR(event);
			
 
				+
			
 
				+	/* success path */
			
 
				+out_save:
			
 
				+	per_cpu(watchdog_ev, cpu) = event;
			
 
				+out_enable:
			
 
				+	perf_event_enable(per_cpu(watchdog_ev, cpu));
			
 
				+out:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void watchdog_nmi_disable(unsigned int cpu)
			
 
				+{
			
 
				+	struct perf_event *event = per_cpu(watchdog_ev, cpu);
			
 
				+
			
 
				+	if (event) {
			
 
				+		perf_event_disable(event);
			
 
				+		per_cpu(watchdog_ev, cpu) = NULL;
			
 
				+
			
 
				+		/* should be in cleanup, but blocks oprofile */
			
 
				+		perf_event_release_kernel(event);
			
 
				+	}
			
 
				+	if (cpu == 0) {
			
 
				+		/* watchdog_nmi_enable() expects this to be zero initially. */
			
 
				+		cpu0_err = 0;
			
 
				+	}
			
 
				+}
			
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -194,8 +194,8 @@ config GDB_SCRIPTS
 
				 	  build directory. If you load vmlinux into gdb, the helper
			
 
				 	  scripts will be automatically imported by gdb as well, and
			
 
				 	  additional functions are available to analyze a Linux kernel
			
 
				-	  instance. See Documentation/gdb-kernel-debugging.txt for further
			
 
				-	  details.
			
 
				+	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
			
 
				+	  for further details.
			
 
				 
			
 
				 config ENABLE_WARN_DEPRECATED
			
 
				 	bool "Enable __deprecated logic"
			
@@ -542,7 +542,7 @@ config DEBUG_KMEMLEAK
 
				 	  difference being that the orphan objects are not freed but
			
 
				 	  only shown in /sys/kernel/debug/kmemleak. Enabling this
			
 
				 	  feature will introduce an overhead to memory
			
 
				-	  allocations. See Documentation/kmemleak.txt for more
			
 
				+	  allocations. See Documentation/dev-tools/kmemleak.rst for more
			
 
				 	  details.
			
 
				 
			
 
				 	  Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
			
@@ -739,7 +739,7 @@ config KCOV
 
				 	  different machines and across reboots. If you need stable PC values,
			
 
				 	  disable RANDOMIZE_BASE.
			
 
				 
			
 
				-	  For more details, see Documentation/kcov.txt.
			
 
				+	  For more details, see Documentation/dev-tools/kcov.rst.
			
 
				 
			
 
				 config KCOV_INSTRUMENT_ALL
			
 
				 	bool "Instrument all code by default"
			
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -10,7 +10,8 @@ config UBSAN
 
				 	  This option enables undefined behaviour sanity checker
			
 
				 	  Compile-time instrumentation is used to detect various undefined
			
 
				 	  behaviours in runtime. Various types of checks may be enabled
			
 
				-	  via boot parameter ubsan_handle (see: Documentation/ubsan.txt).
			
 
				+	  via boot parameter ubsan_handle
			
 
				+	  (see: Documentation/dev-tools/ubsan.rst).
			
 
				 
			
 
				 config UBSAN_SANITIZE_ALL
			
 
				 	bool "Enable instrumentation for the entire kernel"
			
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,6 +22,7 @@
 
				  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				  */
			
 
				 
			
 
				+#include <linux/cpu.h>
			
 
				 #include <linux/errno.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/kernel.h>
			
@@ -69,6 +70,11 @@ struct radix_tree_preload {
 
				 };
			
 
				 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
			
 
				 
			
 
				+static inline struct radix_tree_node *entry_to_node(void *ptr)
			
 
				+{
			
 
				+	return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
			
 
				+}
			
 
				+
			
 
				 static inline void *node_to_entry(void *ptr)
			
 
				 {
			
 
				 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
			
@@ -191,13 +197,12 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
 
				  * Returns next bit offset, or size if nothing found.
			
 
				  */
			
 
				 static __always_inline unsigned long
			
 
				-radix_tree_find_next_bit(const unsigned long *addr,
			
 
				-			 unsigned long size, unsigned long offset)
			
 
				+radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
			
 
				+			 unsigned long offset)
			
 
				 {
			
 
				-	if (!__builtin_constant_p(size))
			
 
				-		return find_next_bit(addr, size, offset);
			
 
				+	const unsigned long *addr = node->tags[tag];
			
 
				 
			
 
				-	if (offset < size) {
			
 
				+	if (offset < RADIX_TREE_MAP_SIZE) {
			
 
				 		unsigned long tmp;
			
 
				 
			
 
				 		addr += offset / BITS_PER_LONG;
			
@@ -205,14 +210,32 @@ radix_tree_find_next_bit(const unsigned long *addr,
 
				 		if (tmp)
			
 
				 			return __ffs(tmp) + offset;
			
 
				 		offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
			
 
				-		while (offset < size) {
			
 
				+		while (offset < RADIX_TREE_MAP_SIZE) {
			
 
				 			tmp = *++addr;
			
 
				 			if (tmp)
			
 
				 				return __ffs(tmp) + offset;
			
 
				 			offset += BITS_PER_LONG;
			
 
				 		}
			
 
				 	}
			
 
				-	return size;
			
 
				+	return RADIX_TREE_MAP_SIZE;
			
 
				+}
			
 
				+
			
 
				+static unsigned int iter_offset(const struct radix_tree_iter *iter)
			
 
				+{
			
 
				+	return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The maximum index which can be stored in a radix tree
			
 
				+ */
			
 
				+static inline unsigned long shift_maxindex(unsigned int shift)
			
 
				+{
			
 
				+	return (RADIX_TREE_MAP_SIZE << shift) - 1;
			
 
				+}
			
 
				+
			
 
				+static inline unsigned long node_maxindex(struct radix_tree_node *node)
			
 
				+{
			
 
				+	return shift_maxindex(node->shift);
			
 
				 }
			
 
				 
			
 
				 #ifndef __KERNEL__
			
@@ -220,10 +243,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 
				 {
			
 
				 	unsigned long i;
			
 
				 
			
 
				-	pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n",
			
 
				-		node, node->offset,
			
 
				+	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
			
 
				+		node, node->offset, index, index | node_maxindex(node),
			
 
				+		node->parent,
			
 
				 		node->tags[0][0], node->tags[1][0], node->tags[2][0],
			
 
				-		node->shift, node->count, node->exceptional, node->parent);
			
 
				+		node->shift, node->count, node->exceptional);
			
 
				 
			
 
				 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
			
 
				 		unsigned long first = index | (i << node->shift);
			
@@ -231,14 +255,16 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 
				 		void *entry = node->slots[i];
			
 
				 		if (!entry)
			
 
				 			continue;
			
 
				-		if (is_sibling_entry(node, entry)) {
			
 
				-			pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n",
			
 
				-					entry, i,
			
 
				-					*(void **)entry_to_node(entry),
			
 
				-					first, last);
			
 
				+		if (entry == RADIX_TREE_RETRY) {
			
 
				+			pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
			
 
				+					i, first, last, node);
			
 
				 		} else if (!radix_tree_is_internal_node(entry)) {
			
 
				-			pr_debug("radix entry %p offset %ld indices %ld-%ld\n",
			
 
				-					entry, i, first, last);
			
 
				+			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
			
 
				+					entry, i, first, last, node);
			
 
				+		} else if (is_sibling_entry(node, entry)) {
			
 
				+			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
			
 
				+					entry, i, first, last, node,
			
 
				+					*(void **)entry_to_node(entry));
			
 
				 		} else {
			
 
				 			dump_node(entry_to_node(entry), first);
			
 
				 		}
			
@@ -262,7 +288,10 @@ static void radix_tree_dump(struct radix_tree_root *root)
 
				  * that the caller has pinned this thread of control to the current CPU.
			
 
				  */
			
 
				 static struct radix_tree_node *
			
 
				-radix_tree_node_alloc(struct radix_tree_root *root)
			
 
				+radix_tree_node_alloc(struct radix_tree_root *root,
			
 
				+			struct radix_tree_node *parent,
			
 
				+			unsigned int shift, unsigned int offset,
			
 
				+			unsigned int count, unsigned int exceptional)
			
 
				 {
			
 
				 	struct radix_tree_node *ret = NULL;
			
 
				 	gfp_t gfp_mask = root_gfp_mask(root);
			
@@ -307,6 +336,13 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 
				 	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
			
 
				 out:
			
 
				 	BUG_ON(radix_tree_is_internal_node(ret));
			
 
				+	if (ret) {
			
 
				+		ret->parent = parent;
			
 
				+		ret->shift = shift;
			
 
				+		ret->offset = offset;
			
 
				+		ret->count = count;
			
 
				+		ret->exceptional = exceptional;
			
 
				+	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -314,17 +350,15 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 
				 {
			
 
				 	struct radix_tree_node *node =
			
 
				 			container_of(head, struct radix_tree_node, rcu_head);
			
 
				-	int i;
			
 
				 
			
 
				 	/*
			
 
				-	 * must only free zeroed nodes into the slab. radix_tree_shrink
			
 
				-	 * can leave us with a non-NULL entry in the first slot, so clear
			
 
				-	 * that here to make sure.
			
 
				+	 * Must only free zeroed nodes into the slab.  We can be left with
			
 
				+	 * non-NULL entries by radix_tree_free_nodes, so clear the entries
			
 
				+	 * and tags here.
			
 
				 	 */
			
 
				-	for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
			
 
				-		tag_clear(node, i, 0);
			
 
				-
			
 
				-	node->slots[0] = NULL;
			
 
				+	memset(node->slots, 0, sizeof(node->slots));
			
 
				+	memset(node->tags, 0, sizeof(node->tags));
			
 
				+	INIT_LIST_HEAD(&node->private_list);
			
 
				 
			
 
				 	kmem_cache_free(radix_tree_node_cachep, node);
			
 
				 }
			
@@ -344,7 +378,7 @@ radix_tree_node_free(struct radix_tree_node *node)
 
				  * To make use of this facility, the radix tree must be initialised without
			
 
				  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
			
 
				  */
			
 
				-static int __radix_tree_preload(gfp_t gfp_mask, int nr)
			
 
				+static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
			
 
				 {
			
 
				 	struct radix_tree_preload *rtp;
			
 
				 	struct radix_tree_node *node;
			
@@ -410,6 +444,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 
				 }
			
 
				 EXPORT_SYMBOL(radix_tree_maybe_preload);
			
 
				 
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+/*
			
 
				+ * Preload with enough objects to ensure that we can split a single entry
			
 
				+ * of order @old_order into many entries of size @new_order
			
 
				+ */
			
 
				+int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
			
 
				+							gfp_t gfp_mask)
			
 
				+{
			
 
				+	unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
			
 
				+	unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
			
 
				+				(new_order / RADIX_TREE_MAP_SHIFT);
			
 
				+	unsigned nr = 0;
			
 
				+
			
 
				+	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
			
 
				+	BUG_ON(new_order >= old_order);
			
 
				+
			
 
				+	while (layers--)
			
 
				+		nr = nr * RADIX_TREE_MAP_SIZE + 1;
			
 
				+	return __radix_tree_preload(gfp_mask, top * nr);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * The same as function above, but preload number of nodes required to insert
			
 
				  * (1 << order) continuous naturally-aligned elements.
			
@@ -455,19 +511,6 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
 
				 	return __radix_tree_preload(gfp_mask, nr_nodes);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * The maximum index which can be stored in a radix tree
			
 
				- */
			
 
				-static inline unsigned long shift_maxindex(unsigned int shift)
			
 
				-{
			
 
				-	return (RADIX_TREE_MAP_SIZE << shift) - 1;
			
 
				-}
			
 
				-
			
 
				-static inline unsigned long node_maxindex(struct radix_tree_node *node)
			
 
				-{
			
 
				-	return shift_maxindex(node->shift);
			
 
				-}
			
 
				-
			
 
				 static unsigned radix_tree_load_root(struct radix_tree_root *root,
			
 
				 		struct radix_tree_node **nodep, unsigned long *maxindex)
			
 
				 {
			
@@ -505,8 +548,8 @@ static int radix_tree_extend(struct radix_tree_root *root,
 
				 		goto out;
			
 
				 
			
 
				 	do {
			
 
				-		struct radix_tree_node *node = radix_tree_node_alloc(root);
			
 
				-
			
 
				+		struct radix_tree_node *node = radix_tree_node_alloc(root,
			
 
				+							NULL, shift, 0, 1, 0);
			
 
				 		if (!node)
			
 
				 			return -ENOMEM;
			
 
				 
			
@@ -517,16 +560,11 @@ static int radix_tree_extend(struct radix_tree_root *root,
 
				 		}
			
 
				 
			
 
				 		BUG_ON(shift > BITS_PER_LONG);
			
 
				-		node->shift = shift;
			
 
				-		node->offset = 0;
			
 
				-		node->count = 1;
			
 
				-		node->parent = NULL;
			
 
				 		if (radix_tree_is_internal_node(slot)) {
			
 
				 			entry_to_node(slot)->parent = node;
			
 
				-		} else {
			
 
				+		} else if (radix_tree_exceptional_entry(slot)) {
			
 
				 			/* Moving an exceptional root->rnode to a node */
			
 
				-			if (radix_tree_exceptional_entry(slot))
			
 
				-				node->exceptional = 1;
			
 
				+			node->exceptional = 1;
			
 
				 		}
			
 
				 		node->slots[0] = slot;
			
 
				 		slot = node_to_entry(node);
			
@@ -665,26 +703,24 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 
				 	shift = radix_tree_load_root(root, &child, &maxindex);
			
 
				 
			
 
				 	/* Make sure the tree is high enough.  */
			
 
				+	if (order > 0 && max == ((1UL << order) - 1))
			
 
				+		max++;
			
 
				 	if (max > maxindex) {
			
 
				 		int error = radix_tree_extend(root, max, shift);
			
 
				 		if (error < 0)
			
 
				 			return error;
			
 
				 		shift = error;
			
 
				 		child = root->rnode;
			
 
				-		if (order == shift)
			
 
				-			shift += RADIX_TREE_MAP_SHIFT;
			
 
				 	}
			
 
				 
			
 
				 	while (shift > order) {
			
 
				 		shift -= RADIX_TREE_MAP_SHIFT;
			
 
				 		if (child == NULL) {
			
 
				 			/* Have to add a child node.  */
			
 
				-			child = radix_tree_node_alloc(root);
			
 
				+			child = radix_tree_node_alloc(root, node, shift,
			
 
				+							offset, 0, 0);
			
 
				 			if (!child)
			
 
				 				return -ENOMEM;
			
 
				-			child->shift = shift;
			
 
				-			child->offset = offset;
			
 
				-			child->parent = node;
			
 
				 			rcu_assign_pointer(*slot, node_to_entry(child));
			
 
				 			if (node)
			
 
				 				node->count++;
			
@@ -697,31 +733,125 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 
				 		slot = &node->slots[offset];
			
 
				 	}
			
 
				 
			
 
				+	if (nodep)
			
 
				+		*nodep = node;
			
 
				+	if (slotp)
			
 
				+		*slotp = slot;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				-	/* Insert pointers to the canonical entry */
			
 
				-	if (order > shift) {
			
 
				-		unsigned i, n = 1 << (order - shift);
			
 
				+/*
			
 
				+ * Free any nodes below this node.  The tree is presumed to not need
			
 
				+ * shrinking, and any user data in the tree is presumed to not need a
			
 
				+ * destructor called on it.  If we need to add a destructor, we can
			
 
				+ * add that functionality later.  Note that we may not clear tags or
			
 
				+ * slots from the tree as an RCU walker may still have a pointer into
			
 
				+ * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
			
 
				+ * but we'll still have to clear those in rcu_free.
			
 
				+ */
			
 
				+static void radix_tree_free_nodes(struct radix_tree_node *node)
			
 
				+{
			
 
				+	unsigned offset = 0;
			
 
				+	struct radix_tree_node *child = entry_to_node(node);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		void *entry = child->slots[offset];
			
 
				+		if (radix_tree_is_internal_node(entry) &&
			
 
				+					!is_sibling_entry(child, entry)) {
			
 
				+			child = entry_to_node(entry);
			
 
				+			offset = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+		offset++;
			
 
				+		while (offset == RADIX_TREE_MAP_SIZE) {
			
 
				+			struct radix_tree_node *old = child;
			
 
				+			offset = child->offset + 1;
			
 
				+			child = child->parent;
			
 
				+			radix_tree_node_free(old);
			
 
				+			if (old == entry_to_node(node))
			
 
				+				return;
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static inline int insert_entries(struct radix_tree_node *node, void **slot,
			
 
				+				void *item, unsigned order, bool replace)
			
 
				+{
			
 
				+	struct radix_tree_node *child;
			
 
				+	unsigned i, n, tag, offset, tags = 0;
			
 
				+
			
 
				+	if (node) {
			
 
				+		if (order > node->shift)
			
 
				+			n = 1 << (order - node->shift);
			
 
				+		else
			
 
				+			n = 1;
			
 
				+		offset = get_slot_offset(node, slot);
			
 
				+	} else {
			
 
				+		n = 1;
			
 
				+		offset = 0;
			
 
				+	}
			
 
				+
			
 
				+	if (n > 1) {
			
 
				 		offset = offset & ~(n - 1);
			
 
				 		slot = &node->slots[offset];
			
 
				-		child = node_to_entry(slot);
			
 
				-		for (i = 0; i < n; i++) {
			
 
				-			if (slot[i])
			
 
				+	}
			
 
				+	child = node_to_entry(slot);
			
 
				+
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		if (slot[i]) {
			
 
				+			if (replace) {
			
 
				+				node->count--;
			
 
				+				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+					if (tag_get(node, tag, offset + i))
			
 
				+						tags |= 1 << tag;
			
 
				+			} else
			
 
				 				return -EEXIST;
			
 
				 		}
			
 
				+	}
			
 
				 
			
 
				-		for (i = 1; i < n; i++) {
			
 
				+	for (i = 0; i < n; i++) {
			
 
				+		struct radix_tree_node *old = slot[i];
			
 
				+		if (i) {
			
 
				 			rcu_assign_pointer(slot[i], child);
			
 
				-			node->count++;
			
 
				+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+				if (tags & (1 << tag))
			
 
				+					tag_clear(node, tag, offset + i);
			
 
				+		} else {
			
 
				+			rcu_assign_pointer(slot[i], item);
			
 
				+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+				if (tags & (1 << tag))
			
 
				+					tag_set(node, tag, offset);
			
 
				 		}
			
 
				+		if (radix_tree_is_internal_node(old) &&
			
 
				+					!is_sibling_entry(node, old) &&
			
 
				+					(old != RADIX_TREE_RETRY))
			
 
				+			radix_tree_free_nodes(old);
			
 
				+		if (radix_tree_exceptional_entry(old))
			
 
				+			node->exceptional--;
			
 
				 	}
			
 
				-#endif
			
 
				-
			
 
				-	if (nodep)
			
 
				-		*nodep = node;
			
 
				-	if (slotp)
			
 
				-		*slotp = slot;
			
 
				-	return 0;
			
 
				+	if (node) {
			
 
				+		node->count += n;
			
 
				+		if (radix_tree_exceptional_entry(item))
			
 
				+			node->exceptional += n;
			
 
				+	}
			
 
				+	return n;
			
 
				+}
			
 
				+#else
			
 
				+static inline int insert_entries(struct radix_tree_node *node, void **slot,
			
 
				+				void *item, unsigned order, bool replace)
			
 
				+{
			
 
				+	if (*slot)
			
 
				+		return -EEXIST;
			
 
				+	rcu_assign_pointer(*slot, item);
			
 
				+	if (node) {
			
 
				+		node->count++;
			
 
				+		if (radix_tree_exceptional_entry(item))
			
 
				+			node->exceptional++;
			
 
				+	}
			
 
				+	return 1;
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 /**
			
 
				  *	__radix_tree_insert    -    insert into a radix tree
			
@@ -744,15 +874,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 
				 	error = __radix_tree_create(root, index, order, &node, &slot);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				-	if (*slot != NULL)
			
 
				-		return -EEXIST;
			
 
				-	rcu_assign_pointer(*slot, item);
			
 
				+
			
 
				+	error = insert_entries(node, slot, item, order, false);
			
 
				+	if (error < 0)
			
 
				+		return error;
			
 
				 
			
 
				 	if (node) {
			
 
				 		unsigned offset = get_slot_offset(node, slot);
			
 
				-		node->count++;
			
 
				-		if (radix_tree_exceptional_entry(item))
			
 
				-			node->exceptional++;
			
 
				 		BUG_ON(tag_get(node, 0, offset));
			
 
				 		BUG_ON(tag_get(node, 1, offset));
			
 
				 		BUG_ON(tag_get(node, 2, offset));
			
@@ -850,6 +978,24 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 
				 }
			
 
				 EXPORT_SYMBOL(radix_tree_lookup);
			
 
				 
			
 
				+static inline int slot_count(struct radix_tree_node *node,
			
 
				+						void **slot)
			
 
				+{
			
 
				+	int n = 1;
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+	void *ptr = node_to_entry(slot);
			
 
				+	unsigned offset = get_slot_offset(node, slot);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
			
 
				+		if (node->slots[offset + i] != ptr)
			
 
				+			break;
			
 
				+		n++;
			
 
				+	}
			
 
				+#endif
			
 
				+	return n;
			
 
				+}
			
 
				+
			
 
				 static void replace_slot(struct radix_tree_root *root,
			
 
				 			 struct radix_tree_node *node,
			
 
				 			 void **slot, void *item,
			
@@ -868,12 +1014,35 @@ static void replace_slot(struct radix_tree_root *root,
 
				 
			
 
				 	if (node) {
			
 
				 		node->count += count;
			
 
				-		node->exceptional += exceptional;
			
 
				+		if (exceptional) {
			
 
				+			exceptional *= slot_count(node, slot);
			
 
				+			node->exceptional += exceptional;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	rcu_assign_pointer(*slot, item);
			
 
				 }
			
 
				 
			
 
				+static inline void delete_sibling_entries(struct radix_tree_node *node,
			
 
				+						void **slot)
			
 
				+{
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+	bool exceptional = radix_tree_exceptional_entry(*slot);
			
 
				+	void *ptr = node_to_entry(slot);
			
 
				+	unsigned offset = get_slot_offset(node, slot);
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
			
 
				+		if (node->slots[offset + i] != ptr)
			
 
				+			break;
			
 
				+		node->slots[offset + i] = NULL;
			
 
				+		node->count--;
			
 
				+		if (exceptional)
			
 
				+			node->exceptional--;
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * __radix_tree_replace		- replace item in a slot
			
 
				  * @root:		radix tree root
			
@@ -891,6 +1060,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 
				 			  void **slot, void *item,
			
 
				 			  radix_tree_update_node_t update_node, void *private)
			
 
				 {
			
 
				+	if (!item)
			
 
				+		delete_sibling_entries(node, slot);
			
 
				 	/*
			
 
				 	 * This function supports replacing exceptional entries and
			
 
				 	 * deleting entries, but that needs accounting against the
			
@@ -921,7 +1092,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
 
				  * NOTE: This cannot be used to switch between non-entries (empty slots),
			
 
				  * regular entries, and exceptional entries, as that requires accounting
			
 
				  * inside the radix tree node. When switching from one type of entry or
			
 
				- * deleting, use __radix_tree_lookup() and __radix_tree_replace().
			
 
				+ * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
			
 
				+ * radix_tree_iter_replace().
			
 
				  */
			
 
				 void radix_tree_replace_slot(struct radix_tree_root *root,
			
 
				 			     void **slot, void *item)
			
@@ -929,6 +1101,164 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
 
				 	replace_slot(root, NULL, slot, item, true);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * radix_tree_iter_replace - replace item in a slot
			
 
				+ * @root:	radix tree root
			
 
				+ * @slot:	pointer to slot
			
 
				+ * @item:	new item to store in the slot.
			
 
				+ *
			
 
				+ * For use with radix_tree_split() and radix_tree_for_each_slot().
			
 
				+ * Caller must hold tree write locked across split and replacement.
			
 
				+ */
			
 
				+void radix_tree_iter_replace(struct radix_tree_root *root,
			
 
				+		const struct radix_tree_iter *iter, void **slot, void *item)
			
 
				+{
			
 
				+	__radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+/**
			
 
				+ * radix_tree_join - replace multiple entries with one multiorder entry
			
 
				+ * @root: radix tree root
			
 
				+ * @index: an index inside the new entry
			
 
				+ * @order: order of the new entry
			
 
				+ * @item: new entry
			
 
				+ *
			
 
				+ * Call this function to replace several entries with one larger entry.
			
 
				+ * The existing entries are presumed to not need freeing as a result of
			
 
				+ * this call.
			
 
				+ *
			
 
				+ * The replacement entry will have all the tags set on it that were set
			
 
				+ * on any of the entries it is replacing.
			
 
				+ */
			
 
				+int radix_tree_join(struct radix_tree_root *root, unsigned long index,
			
 
				+			unsigned order, void *item)
			
 
				+{
			
 
				+	struct radix_tree_node *node;
			
 
				+	void **slot;
			
 
				+	int error;
			
 
				+
			
 
				+	BUG_ON(radix_tree_is_internal_node(item));
			
 
				+
			
 
				+	error = __radix_tree_create(root, index, order, &node, &slot);
			
 
				+	if (!error)
			
 
				+		error = insert_entries(node, slot, item, order, true);
			
 
				+	if (error > 0)
			
 
				+		error = 0;
			
 
				+
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * radix_tree_split - Split an entry into smaller entries
			
 
				+ * @root: radix tree root
			
 
				+ * @index: An index within the large entry
			
 
				+ * @order: Order of new entries
			
 
				+ *
			
 
				+ * Call this function as the first step in replacing a multiorder entry
			
 
				+ * with several entries of lower order.  After this function returns,
			
 
				+ * loop over the relevant portion of the tree using radix_tree_for_each_slot()
			
 
				+ * and call radix_tree_iter_replace() to set up each new entry.
			
 
				+ *
			
 
				+ * The tags from this entry are replicated to all the new entries.
			
 
				+ *
			
 
				+ * The radix tree should be locked against modification during the entire
			
 
				+ * replacement operation.  Lock-free lookups will see RADIX_TREE_RETRY which
			
 
				+ * should prompt RCU walkers to restart the lookup from the root.
			
 
				+ */
			
 
				+int radix_tree_split(struct radix_tree_root *root, unsigned long index,
			
 
				+				unsigned order)
			
 
				+{
			
 
				+	struct radix_tree_node *parent, *node, *child;
			
 
				+	void **slot;
			
 
				+	unsigned int offset, end;
			
 
				+	unsigned n, tag, tags = 0;
			
 
				+
			
 
				+	if (!__radix_tree_lookup(root, index, &parent, &slot))
			
 
				+		return -ENOENT;
			
 
				+	if (!parent)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	offset = get_slot_offset(parent, slot);
			
 
				+
			
 
				+	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+		if (tag_get(parent, tag, offset))
			
 
				+			tags |= 1 << tag;
			
 
				+
			
 
				+	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
			
 
				+		if (!is_sibling_entry(parent, parent->slots[end]))
			
 
				+			break;
			
 
				+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+			if (tags & (1 << tag))
			
 
				+				tag_set(parent, tag, end);
			
 
				+		/* rcu_assign_pointer ensures tags are set before RETRY */
			
 
				+		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
			
 
				+	}
			
 
				+	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
			
 
				+	parent->exceptional -= (end - offset);
			
 
				+
			
 
				+	if (order == parent->shift)
			
 
				+		return 0;
			
 
				+	if (order > parent->shift) {
			
 
				+		while (offset < end)
			
 
				+			offset += insert_entries(parent, &parent->slots[offset],
			
 
				+					RADIX_TREE_RETRY, order, true);
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	node = parent;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		if (node->shift > order) {
			
 
				+			child = radix_tree_node_alloc(root, node,
			
 
				+					node->shift - RADIX_TREE_MAP_SHIFT,
			
 
				+					offset, 0, 0);
			
 
				+			if (!child)
			
 
				+				goto nomem;
			
 
				+			if (node != parent) {
			
 
				+				node->count++;
			
 
				+				node->slots[offset] = node_to_entry(child);
			
 
				+				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+					if (tags & (1 << tag))
			
 
				+						tag_set(node, tag, offset);
			
 
				+			}
			
 
				+
			
 
				+			node = child;
			
 
				+			offset = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		n = insert_entries(node, &node->slots[offset],
			
 
				+					RADIX_TREE_RETRY, order, false);
			
 
				+		BUG_ON(n > RADIX_TREE_MAP_SIZE);
			
 
				+
			
 
				+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				+			if (tags & (1 << tag))
			
 
				+				tag_set(node, tag, offset);
			
 
				+		offset += n;
			
 
				+
			
 
				+		while (offset == RADIX_TREE_MAP_SIZE) {
			
 
				+			if (node == parent)
			
 
				+				break;
			
 
				+			offset = node->offset;
			
 
				+			child = node;
			
 
				+			node = node->parent;
			
 
				+			rcu_assign_pointer(node->slots[offset],
			
 
				+						node_to_entry(child));
			
 
				+			offset++;
			
 
				+		}
			
 
				+		if ((node == parent) && (offset == end))
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+ nomem:
			
 
				+	/* Shouldn't happen; did user forget to preload? */
			
 
				+	/* TODO: free all the allocated nodes */
			
 
				+	WARN_ON(1);
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /**
			
 
				  *	radix_tree_tag_set - set a tag on a radix tree node
			
 
				  *	@root:		radix tree root
			
@@ -990,6 +1320,34 @@ static void node_tag_clear(struct radix_tree_root *root,
 
				 		root_tag_clear(root, tag);
			
 
				 }
			
 
				 
			
 
				+static void node_tag_set(struct radix_tree_root *root,
			
 
				+				struct radix_tree_node *node,
			
 
				+				unsigned int tag, unsigned int offset)
			
 
				+{
			
 
				+	while (node) {
			
 
				+		if (tag_get(node, tag, offset))
			
 
				+			return;
			
 
				+		tag_set(node, tag, offset);
			
 
				+		offset = node->offset;
			
 
				+		node = node->parent;
			
 
				+	}
			
 
				+
			
 
				+	if (!root_tag_get(root, tag))
			
 
				+		root_tag_set(root, tag);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * radix_tree_iter_tag_set - set a tag on the current iterator entry
			
 
				+ * @root:	radix tree root
			
 
				+ * @iter:	iterator state
			
 
				+ * @tag:	tag to set
			
 
				+ */
			
 
				+void radix_tree_iter_tag_set(struct radix_tree_root *root,
			
 
				+			const struct radix_tree_iter *iter, unsigned int tag)
			
 
				+{
			
 
				+	node_tag_set(root, iter->node, tag, iter_offset(iter));
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  *	radix_tree_tag_clear - clear a tag on a radix tree node
			
 
				  *	@root:		radix tree root
			
@@ -1085,6 +1443,121 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter,
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+/* Construct iter->tags bit-mask from node->tags[tag] array */
			
 
				+static void set_iter_tags(struct radix_tree_iter *iter,
			
 
				+				struct radix_tree_node *node, unsigned offset,
			
 
				+				unsigned tag)
			
 
				+{
			
 
				+	unsigned tag_long = offset / BITS_PER_LONG;
			
 
				+	unsigned tag_bit  = offset % BITS_PER_LONG;
			
 
				+
			
 
				+	iter->tags = node->tags[tag][tag_long] >> tag_bit;
			
 
				+
			
 
				+	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
			
 
				+	if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
			
 
				+		/* Pick tags from next element */
			
 
				+		if (tag_bit)
			
 
				+			iter->tags |= node->tags[tag][tag_long + 1] <<
			
 
				+						(BITS_PER_LONG - tag_bit);
			
 
				+		/* Clip chunk size, here only BITS_PER_LONG tags */
			
 
				+		iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				+static void **skip_siblings(struct radix_tree_node **nodep,
			
 
				+			void **slot, struct radix_tree_iter *iter)
			
 
				+{
			
 
				+	void *sib = node_to_entry(slot - 1);
			
 
				+
			
 
				+	while (iter->index < iter->next_index) {
			
 
				+		*nodep = rcu_dereference_raw(*slot);
			
 
				+		if (*nodep && *nodep != sib)
			
 
				+			return slot;
			
 
				+		slot++;
			
 
				+		iter->index = __radix_tree_iter_add(iter, 1);
			
 
				+		iter->tags >>= 1;
			
 
				+	}
			
 
				+
			
 
				+	*nodep = NULL;
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
			
 
				+					unsigned flags)
			
 
				+{
			
 
				+	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
			
 
				+	struct radix_tree_node *node = rcu_dereference_raw(*slot);
			
 
				+
			
 
				+	slot = skip_siblings(&node, slot, iter);
			
 
				+
			
 
				+	while (radix_tree_is_internal_node(node)) {
			
 
				+		unsigned offset;
			
 
				+		unsigned long next_index;
			
 
				+
			
 
				+		if (node == RADIX_TREE_RETRY)
			
 
				+			return slot;
			
 
				+		node = entry_to_node(node);
			
 
				+		iter->node = node;
			
 
				+		iter->shift = node->shift;
			
 
				+
			
 
				+		if (flags & RADIX_TREE_ITER_TAGGED) {
			
 
				+			offset = radix_tree_find_next_bit(node, tag, 0);
			
 
				+			if (offset == RADIX_TREE_MAP_SIZE)
			
 
				+				return NULL;
			
 
				+			slot = &node->slots[offset];
			
 
				+			iter->index = __radix_tree_iter_add(iter, offset);
			
 
				+			set_iter_tags(iter, node, offset, tag);
			
 
				+			node = rcu_dereference_raw(*slot);
			
 
				+		} else {
			
 
				+			offset = 0;
			
 
				+			slot = &node->slots[0];
			
 
				+			for (;;) {
			
 
				+				node = rcu_dereference_raw(*slot);
			
 
				+				if (node)
			
 
				+					break;
			
 
				+				slot++;
			
 
				+				offset++;
			
 
				+				if (offset == RADIX_TREE_MAP_SIZE)
			
 
				+					return NULL;
			
 
				+			}
			
 
				+			iter->index = __radix_tree_iter_add(iter, offset);
			
 
				+		}
			
 
				+		if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
			
 
				+			goto none;
			
 
				+		next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
			
 
				+		if (next_index < iter->next_index)
			
 
				+			iter->next_index = next_index;
			
 
				+	}
			
 
				+
			
 
				+	return slot;
			
 
				+ none:
			
 
				+	iter->next_index = 0;
			
 
				+	return NULL;
			
 
				+}
			
 
				+EXPORT_SYMBOL(__radix_tree_next_slot);
			
 
				+#else
			
 
				+static void **skip_siblings(struct radix_tree_node **nodep,
			
 
				+			void **slot, struct radix_tree_iter *iter)
			
 
				+{
			
 
				+	return slot;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
			
 
				+{
			
 
				+	struct radix_tree_node *node;
			
 
				+
			
 
				+	slot++;
			
 
				+	iter->index = __radix_tree_iter_add(iter, 1);
			
 
				+	node = rcu_dereference_raw(*slot);
			
 
				+	skip_siblings(&node, slot, iter);
			
 
				+	iter->next_index = iter->index;
			
 
				+	iter->tags = 0;
			
 
				+	return NULL;
			
 
				+}
			
 
				+EXPORT_SYMBOL(radix_tree_iter_resume);
			
 
				+
			
 
				 /**
			
 
				  * radix_tree_next_chunk - find next chunk of slots for iteration
			
 
				  *
			
@@ -1110,7 +1583,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 
				 	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
			
 
				 	 *
			
 
				 	 * This condition also used by radix_tree_next_slot() to stop
			
 
				-	 * contiguous iterating, and forbid swithing to the next chunk.
			
 
				+	 * contiguous iterating, and forbid switching to the next chunk.
			
 
				 	 */
			
 
				 	index = iter->next_index;
			
 
				 	if (!index && iter->index)
			
@@ -1128,6 +1601,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 
				 		iter->index = index;
			
 
				 		iter->next_index = maxindex + 1;
			
 
				 		iter->tags = 1;
			
 
				+		iter->node = NULL;
			
 
				 		__set_iter_shift(iter, 0);
			
 
				 		return (void **)&root->rnode;
			
 
				 	}
			
@@ -1143,9 +1617,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 
				 				return NULL;
			
 
				 
			
 
				 			if (flags & RADIX_TREE_ITER_TAGGED)
			
 
				-				offset = radix_tree_find_next_bit(
			
 
				-						node->tags[tag],
			
 
				-						RADIX_TREE_MAP_SIZE,
			
 
				+				offset = radix_tree_find_next_bit(node, tag,
			
 
				 						offset + 1);
			
 
				 			else
			
 
				 				while (++offset	< RADIX_TREE_MAP_SIZE) {
			
@@ -1165,153 +1637,25 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 
				 			child = rcu_dereference_raw(node->slots[offset]);
			
 
				 		}
			
 
				 
			
 
				-		if ((child == NULL) || (child == RADIX_TREE_RETRY))
			
 
				+		if (!child)
			
 
				 			goto restart;
			
 
				+		if (child == RADIX_TREE_RETRY)
			
 
				+			break;
			
 
				 	} while (radix_tree_is_internal_node(child));
			
 
				 
			
 
				 	/* Update the iterator state */
			
 
				 	iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
			
 
				 	iter->next_index = (index | node_maxindex(node)) + 1;
			
 
				+	iter->node = node;
			
 
				 	__set_iter_shift(iter, node->shift);
			
 
				 
			
 
				-	/* Construct iter->tags bit-mask from node->tags[tag] array */
			
 
				-	if (flags & RADIX_TREE_ITER_TAGGED) {
			
 
				-		unsigned tag_long, tag_bit;
			
 
				-
			
 
				-		tag_long = offset / BITS_PER_LONG;
			
 
				-		tag_bit  = offset % BITS_PER_LONG;
			
 
				-		iter->tags = node->tags[tag][tag_long] >> tag_bit;
			
 
				-		/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
			
 
				-		if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
			
 
				-			/* Pick tags from next element */
			
 
				-			if (tag_bit)
			
 
				-				iter->tags |= node->tags[tag][tag_long + 1] <<
			
 
				-						(BITS_PER_LONG - tag_bit);
			
 
				-			/* Clip chunk size, here only BITS_PER_LONG tags */
			
 
				-			iter->next_index = index + BITS_PER_LONG;
			
 
				-		}
			
 
				-	}
			
 
				+	if (flags & RADIX_TREE_ITER_TAGGED)
			
 
				+		set_iter_tags(iter, node, offset, tag);
			
 
				 
			
 
				 	return node->slots + offset;
			
 
				 }
			
 
				 EXPORT_SYMBOL(radix_tree_next_chunk);
			
 
				 
			
 
				-/**
			
 
				- * radix_tree_range_tag_if_tagged - for each item in given range set given
			
 
				- *				   tag if item has another tag set
			
 
				- * @root:		radix tree root
			
 
				- * @first_indexp:	pointer to a starting index of a range to scan
			
 
				- * @last_index:		last index of a range to scan
			
 
				- * @nr_to_tag:		maximum number items to tag
			
 
				- * @iftag:		tag index to test
			
 
				- * @settag:		tag index to set if tested tag is set
			
 
				- *
			
 
				- * This function scans range of radix tree from first_index to last_index
			
 
				- * (inclusive).  For each item in the range if iftag is set, the function sets
			
 
				- * also settag. The function stops either after tagging nr_to_tag items or
			
 
				- * after reaching last_index.
			
 
				- *
			
 
				- * The tags must be set from the leaf level only and propagated back up the
			
 
				- * path to the root. We must do this so that we resolve the full path before
			
 
				- * setting any tags on intermediate nodes. If we set tags as we descend, then
			
 
				- * we can get to the leaf node and find that the index that has the iftag
			
 
				- * set is outside the range we are scanning. This reults in dangling tags and
			
 
				- * can lead to problems with later tag operations (e.g. livelocks on lookups).
			
 
				- *
			
 
				- * The function returns the number of leaves where the tag was set and sets
			
 
				- * *first_indexp to the first unscanned index.
			
 
				- * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
			
 
				- * be prepared to handle that.
			
 
				- */
			
 
				-unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
			
 
				-		unsigned long *first_indexp, unsigned long last_index,
			
 
				-		unsigned long nr_to_tag,
			
 
				-		unsigned int iftag, unsigned int settag)
			
 
				-{
			
 
				-	struct radix_tree_node *parent, *node, *child;
			
 
				-	unsigned long maxindex;
			
 
				-	unsigned long tagged = 0;
			
 
				-	unsigned long index = *first_indexp;
			
 
				-
			
 
				-	radix_tree_load_root(root, &child, &maxindex);
			
 
				-	last_index = min(last_index, maxindex);
			
 
				-	if (index > last_index)
			
 
				-		return 0;
			
 
				-	if (!nr_to_tag)
			
 
				-		return 0;
			
 
				-	if (!root_tag_get(root, iftag)) {
			
 
				-		*first_indexp = last_index + 1;
			
 
				-		return 0;
			
 
				-	}
			
 
				-	if (!radix_tree_is_internal_node(child)) {
			
 
				-		*first_indexp = last_index + 1;
			
 
				-		root_tag_set(root, settag);
			
 
				-		return 1;
			
 
				-	}
			
 
				-
			
 
				-	node = entry_to_node(child);
			
 
				-
			
 
				-	for (;;) {
			
 
				-		unsigned offset = radix_tree_descend(node, &child, index);
			
 
				-		if (!child)
			
 
				-			goto next;
			
 
				-		if (!tag_get(node, iftag, offset))
			
 
				-			goto next;
			
 
				-		/* Sibling slots never have tags set on them */
			
 
				-		if (radix_tree_is_internal_node(child)) {
			
 
				-			node = entry_to_node(child);
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* tag the leaf */
			
 
				-		tagged++;
			
 
				-		tag_set(node, settag, offset);
			
 
				-
			
 
				-		/* walk back up the path tagging interior nodes */
			
 
				-		parent = node;
			
 
				-		for (;;) {
			
 
				-			offset = parent->offset;
			
 
				-			parent = parent->parent;
			
 
				-			if (!parent)
			
 
				-				break;
			
 
				-			/* stop if we find a node with the tag already set */
			
 
				-			if (tag_get(parent, settag, offset))
			
 
				-				break;
			
 
				-			tag_set(parent, settag, offset);
			
 
				-		}
			
 
				- next:
			
 
				-		/* Go to next entry in node */
			
 
				-		index = ((index >> node->shift) + 1) << node->shift;
			
 
				-		/* Overflow can happen when last_index is ~0UL... */
			
 
				-		if (index > last_index || !index)
			
 
				-			break;
			
 
				-		offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
			
 
				-		while (offset == 0) {
			
 
				-			/*
			
 
				-			 * We've fully scanned this node. Go up. Because
			
 
				-			 * last_index is guaranteed to be in the tree, what
			
 
				-			 * we do below cannot wander astray.
			
 
				-			 */
			
 
				-			node = node->parent;
			
 
				-			offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
			
 
				-		}
			
 
				-		if (is_sibling_entry(node, node->slots[offset]))
			
 
				-			goto next;
			
 
				-		if (tagged >= nr_to_tag)
			
 
				-			break;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * We need not to tag the root tag if there is no tag which is set with
			
 
				-	 * settag within the range from *first_indexp to last_index.
			
 
				-	 */
			
 
				-	if (tagged > 0)
			
 
				-		root_tag_set(root, settag);
			
 
				-	*first_indexp = index;
			
 
				-
			
 
				-	return tagged;
			
 
				-}
			
 
				-EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
			
 
				-
			
 
				 /**
			
 
				  *	radix_tree_gang_lookup - perform multiple lookup on a radix tree
			
 
				  *	@root:		radix tree root
			
@@ -1477,105 +1821,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 
				 }
			
 
				 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
			
 
				 
			
 
				-#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
			
 
				-#include <linux/sched.h> /* for cond_resched() */
			
 
				-
			
 
				-struct locate_info {
			
 
				-	unsigned long found_index;
			
 
				-	bool stop;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * This linear search is at present only useful to shmem_unuse_inode().
			
 
				- */
			
 
				-static unsigned long __locate(struct radix_tree_node *slot, void *item,
			
 
				-			      unsigned long index, struct locate_info *info)
			
 
				-{
			
 
				-	unsigned long i;
			
 
				-
			
 
				-	do {
			
 
				-		unsigned int shift = slot->shift;
			
 
				-
			
 
				-		for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
			
 
				-		     i < RADIX_TREE_MAP_SIZE;
			
 
				-		     i++, index += (1UL << shift)) {
			
 
				-			struct radix_tree_node *node =
			
 
				-					rcu_dereference_raw(slot->slots[i]);
			
 
				-			if (node == RADIX_TREE_RETRY)
			
 
				-				goto out;
			
 
				-			if (!radix_tree_is_internal_node(node)) {
			
 
				-				if (node == item) {
			
 
				-					info->found_index = index;
			
 
				-					info->stop = true;
			
 
				-					goto out;
			
 
				-				}
			
 
				-				continue;
			
 
				-			}
			
 
				-			node = entry_to_node(node);
			
 
				-			if (is_sibling_entry(slot, node))
			
 
				-				continue;
			
 
				-			slot = node;
			
 
				-			break;
			
 
				-		}
			
 
				-	} while (i < RADIX_TREE_MAP_SIZE);
			
 
				-
			
 
				-out:
			
 
				-	if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
			
 
				-		info->stop = true;
			
 
				-	return index;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- *	radix_tree_locate_item - search through radix tree for item
			
 
				- *	@root:		radix tree root
			
 
				- *	@item:		item to be found
			
 
				- *
			
 
				- *	Returns index where item was found, or -1 if not found.
			
 
				- *	Caller must hold no lock (since this time-consuming function needs
			
 
				- *	to be preemptible), and must check afterwards if item is still there.
			
 
				- */
			
 
				-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
			
 
				-{
			
 
				-	struct radix_tree_node *node;
			
 
				-	unsigned long max_index;
			
 
				-	unsigned long cur_index = 0;
			
 
				-	struct locate_info info = {
			
 
				-		.found_index = -1,
			
 
				-		.stop = false,
			
 
				-	};
			
 
				-
			
 
				-	do {
			
 
				-		rcu_read_lock();
			
 
				-		node = rcu_dereference_raw(root->rnode);
			
 
				-		if (!radix_tree_is_internal_node(node)) {
			
 
				-			rcu_read_unlock();
			
 
				-			if (node == item)
			
 
				-				info.found_index = 0;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		node = entry_to_node(node);
			
 
				-
			
 
				-		max_index = node_maxindex(node);
			
 
				-		if (cur_index > max_index) {
			
 
				-			rcu_read_unlock();
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		cur_index = __locate(node, item, cur_index, &info);
			
 
				-		rcu_read_unlock();
			
 
				-		cond_resched();
			
 
				-	} while (!info.stop && cur_index <= max_index);
			
 
				-
			
 
				-	return info.found_index;
			
 
				-}
			
 
				-#else
			
 
				-unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
			
 
				-{
			
 
				-	return -1;
			
 
				-}
			
 
				-#endif /* CONFIG_SHMEM && CONFIG_SWAP */
			
 
				-
			
 
				 /**
			
 
				  *	__radix_tree_delete_node    -    try to free node after clearing a slot
			
 
				  *	@root:		radix tree root
			
@@ -1591,20 +1836,6 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 
				 	delete_node(root, node, NULL, NULL);
			
 
				 }
			
 
				 
			
 
				-static inline void delete_sibling_entries(struct radix_tree_node *node,
			
 
				-					void *ptr, unsigned offset)
			
 
				-{
			
 
				-#ifdef CONFIG_RADIX_TREE_MULTIORDER
			
 
				-	int i;
			
 
				-	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
			
 
				-		if (node->slots[offset + i] != ptr)
			
 
				-			break;
			
 
				-		node->slots[offset + i] = NULL;
			
 
				-		node->count--;
			
 
				-	}
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  *	radix_tree_delete_item    -    delete an item from a radix tree
			
 
				  *	@root:		radix tree root
			
@@ -1644,7 +1875,6 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 
				 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
			
 
				 		node_tag_clear(root, node, tag, offset);
			
 
				 
			
 
				-	delete_sibling_entries(node, node_to_entry(slot), offset);
			
 
				 	__radix_tree_replace(root, node, slot, NULL, NULL, NULL);
			
 
				 
			
 
				 	return entry;
			
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -818,6 +818,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
				 		    page_count(page) > page_mapcount(page))
			
 
				 			goto isolate_fail;
			
 
				 
			
 
				+		/*
			
 
				+		 * Only allow to migrate anonymous pages in GFP_NOFS context
			
 
				+		 * because those do not depend on fs locks.
			
 
				+		 */
			
 
				+		if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
			
 
				+			goto isolate_fail;
			
 
				+
			
 
				 		/* If we already hold the lock, we can skip some rechecking */
			
 
				 		if (!locked) {
			
 
				 			locked = compact_trylock_irqsave(zone_lru_lock(zone),
			
@@ -1677,14 +1684,16 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 
				 		unsigned int alloc_flags, const struct alloc_context *ac,
			
 
				 		enum compact_priority prio)
			
 
				 {
			
 
				-	int may_enter_fs = gfp_mask & __GFP_FS;
			
 
				 	int may_perform_io = gfp_mask & __GFP_IO;
			
 
				 	struct zoneref *z;
			
 
				 	struct zone *zone;
			
 
				 	enum compact_result rc = COMPACT_SKIPPED;
			
 
				 
			
 
				-	/* Check if the GFP flags allow compaction */
			
 
				-	if (!may_enter_fs || !may_perform_io)
			
 
				+	/*
			
 
				+	 * Check if the GFP flags allow compaction - GFP_NOIO is really
			
 
				+	 * tricky context because the migration might require IO
			
 
				+	 */
			
 
				+	if (!may_perform_io)
			
 
				 		return COMPACT_SKIPPED;
			
 
				 
			
 
				 	trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
			
@@ -1751,6 +1760,7 @@ static void compact_node(int nid)
 
				 		.mode = MIGRATE_SYNC,
			
 
				 		.ignore_skip_hint = true,
			
 
				 		.whole_zone = true,
			
 
				+		.gfp_mask = GFP_KERNEL,
			
 
				 	};
			
 
				 
			
 
				 
			
@@ -1876,6 +1886,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 
				 		.classzone_idx = pgdat->kcompactd_classzone_idx,
			
 
				 		.mode = MIGRATE_SYNC_LIGHT,
			
 
				 		.ignore_skip_hint = true,
			
 
				+		.gfp_mask = GFP_KERNEL,
			
 
				 
			
 
				 	};
			
 
				 	trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
			
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2164,12 +2164,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 }
			
 
				 EXPORT_SYMBOL(filemap_fault);
			
 
				 
			
 
				-void filemap_map_pages(struct fault_env *fe,
			
 
				+void filemap_map_pages(struct vm_fault *vmf,
			
 
				 		pgoff_t start_pgoff, pgoff_t end_pgoff)
			
 
				 {
			
 
				 	struct radix_tree_iter iter;
			
 
				 	void **slot;
			
 
				-	struct file *file = fe->vma->vm_file;
			
 
				+	struct file *file = vmf->vma->vm_file;
			
 
				 	struct address_space *mapping = file->f_mapping;
			
 
				 	pgoff_t last_pgoff = start_pgoff;
			
 
				 	loff_t size;
			
@@ -2225,11 +2225,11 @@ void filemap_map_pages(struct fault_env *fe,
 
				 		if (file->f_ra.mmap_miss > 0)
			
 
				 			file->f_ra.mmap_miss--;
			
 
				 
			
 
				-		fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
			
 
				-		if (fe->pte)
			
 
				-			fe->pte += iter.index - last_pgoff;
			
 
				+		vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
			
 
				+		if (vmf->pte)
			
 
				+			vmf->pte += iter.index - last_pgoff;
			
 
				 		last_pgoff = iter.index;
			
 
				-		if (alloc_set_pte(fe, NULL, page))
			
 
				+		if (alloc_set_pte(vmf, NULL, page))
			
 
				 			goto unlock;
			
 
				 		unlock_page(page);
			
 
				 		goto next;
			
@@ -2239,7 +2239,7 @@ void filemap_map_pages(struct fault_env *fe,
 
				 		put_page(page);
			
 
				 next:
			
 
				 		/* Huge page is mapped? No need to proceed. */
			
 
				-		if (pmd_trans_huge(*fe->pmd))
			
 
				+		if (pmd_trans_huge(*vmf->pmd))
			
 
				 			break;
			
 
				 		if (iter.index == end_pgoff)
			
 
				 			break;
			
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -865,9 +865,10 @@ EXPORT_SYMBOL(get_user_pages_locked);
 
				  * caller if required (just like with __get_user_pages). "FOLL_GET"
			
 
				  * is set implicitly if "pages" is non-NULL.
			
 
				  */
			
 
				-__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
			
 
				-					       unsigned long start, unsigned long nr_pages,
			
 
				-					       struct page **pages, unsigned int gup_flags)
			
 
				+static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
			
 
				+		struct mm_struct *mm, unsigned long start,
			
 
				+		unsigned long nr_pages, struct page **pages,
			
 
				+		unsigned int gup_flags)
			
 
				 {
			
 
				 	long ret;
			
 
				 	int locked = 1;
			
@@ -879,7 +880,6 @@ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct m
 
				 		up_read(&mm->mmap_sem);
			
 
				 	return ret;
			
 
				 }
			
 
				-EXPORT_SYMBOL(__get_user_pages_unlocked);
			
 
				 
			
 
				 /*
			
 
				  * get_user_pages_unlocked() is suitable to replace the form:
			
@@ -917,6 +917,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
 
				  *		only intends to ensure the pages are faulted in.
			
 
				  * @vmas:	array of pointers to vmas corresponding to each page.
			
 
				  *		Or NULL if the caller does not require them.
			
 
				+ * @locked:	pointer to lock flag indicating whether lock is held and
			
 
				+ *		subsequently whether VM_FAULT_RETRY functionality can be
			
 
				+ *		utilised. Lock must initially be held.
			
 
				  *
			
 
				  * Returns number of pages pinned. This may be fewer than the number
			
 
				  * requested. If nr_pages is 0 or negative, returns 0. If no pages
			
@@ -960,10 +963,10 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
 
				 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
			
 
				 		unsigned long start, unsigned long nr_pages,
			
 
				 		unsigned int gup_flags, struct page **pages,
			
 
				-		struct vm_area_struct **vmas)
			
 
				+		struct vm_area_struct **vmas, int *locked)
			
 
				 {
			
 
				 	return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
			
 
				-				       NULL, false,
			
 
				+				       locked, true,
			
 
				 				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
			
 
				 }
			
 
				 EXPORT_SYMBOL(get_user_pages_remote);
			
@@ -971,8 +974,9 @@ EXPORT_SYMBOL(get_user_pages_remote);
 
				 /*
			
 
				  * This is the same as get_user_pages_remote(), just with a
			
 
				  * less-flexible calling convention where we assume that the task
			
 
				- * and mm being operated on are the current task's.  We also
			
 
				- * obviously don't pass FOLL_REMOTE in here.
			
 
				+ * and mm being operated on are the current task's and don't allow
			
 
				+ * passing of a locked parameter.  We also obviously don't pass
			
 
				+ * FOLL_REMOTE in here.
			
 
				  */
			
 
				 long get_user_pages(unsigned long start, unsigned long nr_pages,
			
 
				 		unsigned int gup_flags, struct page **pages,