11 ani în urmă · df32e43a54
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -767,6 +767,7 @@ The "Locked" indicates whether the mapping is locked in memory or not.
 
				 
			
 
				 MemTotal:     16344972 kB
			
 
				 MemFree:      13634064 kB
			
 
				+MemAvailable: 14836172 kB
			
 
				 Buffers:          3656 kB
			
 
				 Cached:        1195708 kB
			
 
				 SwapCached:          0 kB
			
@@ -799,6 +800,14 @@ AnonHugePages:   49152 kB
 
				     MemTotal: Total usable ram (i.e. physical ram minus a few reserved
			
 
				               bits and the kernel binary code)
			
 
				      MemFree: The sum of LowFree+HighFree
			
 
				+MemAvailable: An estimate of how much memory is available for starting new
			
 
				+              applications, without swapping. Calculated from MemFree,
			
 
				+              SReclaimable, the size of the file LRU lists, and the low
			
 
				+              watermarks in each zone.
			
 
				+              The estimate takes into account that the system needs some
			
 
				+              page cache to function well, and that not all reclaimable
			
 
				+              slab will be reclaimable, due to items being in use. The
			
 
				+              impact of those factors will vary from system to system.
			
 
				      Buffers: Relatively temporary storage for raw disk blocks
			
 
				               shouldn't get tremendously large (20MB or so)
			
 
				       Cached: in-memory cache for files read from the disk (the
			
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -47,6 +47,7 @@ Currently, these files are in /proc/sys/vm:
 
				 - numa_zonelist_order
			
 
				 - oom_dump_tasks
			
 
				 - oom_kill_allocating_task
			
 
				+- overcommit_kbytes
			
 
				 - overcommit_memory
			
 
				 - overcommit_ratio
			
 
				 - page-cluster
			
@@ -574,6 +575,17 @@ The default value is 0.
 
				 
			
 
				 ==============================================================
			
 
				 
			
 
				+overcommit_kbytes:
			
 
				+
			
 
				+When overcommit_memory is set to 2, the committed address space is not
			
 
				+permitted to exceed swap plus this amount of physical RAM. See below.
			
 
				+
			
 
				+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
			
 
				+of them may be specified at a time. Setting one disables the other (which
			
 
				+then appears as 0 when read).
			
 
				+
			
 
				+==============================================================
			
 
				+
			
 
				 overcommit_memory:
			
 
				 
			
 
				 This value contains a flag that enables memory overcommitment.
			
--- a/Documentation/vm/overcommit-accounting
+++ b/Documentation/vm/overcommit-accounting
@@ -14,8 +14,8 @@ The Linux kernel supports the following overcommit handling modes
 
				 
			
 
				 2	-	Don't overcommit. The total address space commit
			
 
				 		for the system is not permitted to exceed swap + a
			
 
				-		configurable percentage (default is 50) of physical RAM.
			
 
				-		Depending on the percentage you use, in most situations
			
 
				+		configurable amount (default is 50%) of physical RAM.
			
 
				+		Depending on the amount you use, in most situations
			
 
				 		this means a process will not be killed while accessing
			
 
				 		pages but will receive errors on memory allocation as
			
 
				 		appropriate.
			
@@ -26,7 +26,8 @@ The Linux kernel supports the following overcommit handling modes
 
				 
			
 
				 The overcommit policy is set via the sysctl `vm.overcommit_memory'.
			
 
				 
			
 
				-The overcommit percentage is set via `vm.overcommit_ratio'.
			
 
				+The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
			
 
				+or `vm.overcommit_kbytes' (absolute value).
			
 
				 
			
 
				 The current overcommit limit and amount committed are viewable in
			
 
				 /proc/meminfo as CommitLimit and Committed_AS respectively.
			
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -8,8 +8,8 @@
 
				 #define MAX_DMA_ADDRESS	0xffffffffUL
			
 
				 #else
			
 
				 #define MAX_DMA_ADDRESS	({ \
			
 
				-	extern unsigned long arm_dma_zone_size; \
			
 
				-	arm_dma_zone_size ? \
			
 
				+	extern phys_addr_t arm_dma_zone_size; \
			
 
				+	arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
			
 
				 		(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
			
 
				 #endif
			
 
				 
			
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -33,7 +33,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
				 
			
 
				 void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
			
 
				 {
			
 
				-	return alloc_bootmem_align(size, align);
			
 
				+	return memblock_virt_alloc(size, align);
			
 
				 }
			
 
				 
			
 
				 void __init arm_dt_memblock_reserve(void)
			
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -717,7 +717,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 
				 	kernel_data.end     = virt_to_phys(_end - 1);
			
 
				 
			
 
				 	for_each_memblock(memory, region) {
			
 
				-		res = alloc_bootmem_low(sizeof(*res));
			
 
				+		res = memblock_virt_alloc(sizeof(*res), 0);
			
 
				 		res->name  = "System RAM";
			
 
				 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
			
 
				 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
			
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2791,9 +2791,7 @@ static int __init _alloc_links(struct omap_hwmod_link **ml,
 
				 	sz = sizeof(struct omap_hwmod_link) * LINKS_PER_OCP_IF;
			
 
				 
			
 
				 	*sl = NULL;
			
 
				-	*ml = alloc_bootmem(sz);
			
 
				-
			
 
				-	memset(*ml, 0, sz);
			
 
				+	*ml = memblock_virt_alloc(sz, 0);
			
 
				 
			
 
				 	*sl = (void *)(*ml) + sizeof(struct omap_hwmod_link);
			
 
				 
			
@@ -2912,9 +2910,7 @@ static int __init _alloc_linkspace(struct omap_hwmod_ocp_if **ois)
 
				 	pr_debug("omap_hwmod: %s: allocating %d byte linkspace (%d links)\n",
			
 
				 		 __func__, sz, max_ls);
			
 
				 
			
 
				-	linkspace = alloc_bootmem(sz);
			
 
				-
			
 
				-	memset(linkspace, 0, sz);
			
 
				+	linkspace = memblock_virt_alloc(sz, 0);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -92,9 +92,6 @@ void show_mem(unsigned int filter)
 
				 	printk("Mem-info:\n");
			
 
				 	show_free_areas(filter);
			
 
				 
			
 
				-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
			
 
				-		return;
			
 
				-
			
 
				 	for_each_bank (i, mi) {
			
 
				 		struct membank *bank = &mi->bank[i];
			
 
				 		unsigned int pfn1, pfn2;
			
@@ -461,7 +458,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 
				 	 * free the section of the memmap array.
			
 
				 	 */
			
 
				 	if (pg < pgend)
			
 
				-		free_bootmem(pg, pgend - pg);
			
 
				+		memblock_free_early(pg, pgend - pg);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -31,74 +31,6 @@
 
				 static unsigned long max_gap;
			
 
				 #endif
			
 
				 
			
 
				-/**
			
 
				- * show_mem - give short summary of memory stats
			
 
				- *
			
 
				- * Shows a simple page count of reserved and used pages in the system.
			
 
				- * For discontig machines, it does this on a per-pgdat basis.
			
 
				- */
			
 
				-void show_mem(unsigned int filter)
			
 
				-{
			
 
				-	int i, total_reserved = 0;
			
 
				-	int total_shared = 0, total_cached = 0;
			
 
				-	unsigned long total_present = 0;
			
 
				-	pg_data_t *pgdat;
			
 
				-
			
 
				-	printk(KERN_INFO "Mem-info:\n");
			
 
				-	show_free_areas(filter);
			
 
				-	printk(KERN_INFO "Node memory in pages:\n");
			
 
				-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
			
 
				-		return;
			
 
				-	for_each_online_pgdat(pgdat) {
			
 
				-		unsigned long present;
			
 
				-		unsigned long flags;
			
 
				-		int shared = 0, cached = 0, reserved = 0;
			
 
				-		int nid = pgdat->node_id;
			
 
				-
			
 
				-		if (skip_free_areas_node(filter, nid))
			
 
				-			continue;
			
 
				-		pgdat_resize_lock(pgdat, &flags);
			
 
				-		present = pgdat->node_present_pages;
			
 
				-		for(i = 0; i < pgdat->node_spanned_pages; i++) {
			
 
				-			struct page *page;
			
 
				-			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
			
 
				-				touch_nmi_watchdog();
			
 
				-			if (pfn_valid(pgdat->node_start_pfn + i))
			
 
				-				page = pfn_to_page(pgdat->node_start_pfn + i);
			
 
				-			else {
			
 
				-#ifdef CONFIG_VIRTUAL_MEM_MAP
			
 
				-				if (max_gap < LARGE_GAP)
			
 
				-					continue;
			
 
				-#endif
			
 
				-				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
			
 
				-				continue;
			
 
				-			}
			
 
				-			if (PageReserved(page))
			
 
				-				reserved++;
			
 
				-			else if (PageSwapCache(page))
			
 
				-				cached++;
			
 
				-			else if (page_count(page))
			
 
				-				shared += page_count(page)-1;
			
 
				-		}
			
 
				-		pgdat_resize_unlock(pgdat, &flags);
			
 
				-		total_present += present;
			
 
				-		total_reserved += reserved;
			
 
				-		total_cached += cached;
			
 
				-		total_shared += shared;
			
 
				-		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
			
 
				-		       "shrd: %10d, swpd: %10d\n", nid,
			
 
				-		       present, reserved, shared, cached);
			
 
				-	}
			
 
				-	printk(KERN_INFO "%ld pages of RAM\n", total_present);
			
 
				-	printk(KERN_INFO "%d reserved pages\n", total_reserved);
			
 
				-	printk(KERN_INFO "%d pages shared\n", total_shared);
			
 
				-	printk(KERN_INFO "%d pages swap cached\n", total_cached);
			
 
				-	printk(KERN_INFO "Total of %ld pages in page table cache\n",
			
 
				-	       quicklist_total_size());
			
 
				-	printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /* physical address where the bootmem map is located */
			
 
				 unsigned long bootmap_start;
			
 
				 
			
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -607,69 +607,6 @@ void *per_cpu_init(void)
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				-/**
			
 
				- * show_mem - give short summary of memory stats
			
 
				- *
			
 
				- * Shows a simple page count of reserved and used pages in the system.
			
 
				- * For discontig machines, it does this on a per-pgdat basis.
			
 
				- */
			
 
				-void show_mem(unsigned int filter)
			
 
				-{
			
 
				-	int i, total_reserved = 0;
			
 
				-	int total_shared = 0, total_cached = 0;
			
 
				-	unsigned long total_present = 0;
			
 
				-	pg_data_t *pgdat;
			
 
				-
			
 
				-	printk(KERN_INFO "Mem-info:\n");
			
 
				-	show_free_areas(filter);
			
 
				-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
			
 
				-		return;
			
 
				-	printk(KERN_INFO "Node memory in pages:\n");
			
 
				-	for_each_online_pgdat(pgdat) {
			
 
				-		unsigned long present;
			
 
				-		unsigned long flags;
			
 
				-		int shared = 0, cached = 0, reserved = 0;
			
 
				-		int nid = pgdat->node_id;
			
 
				-
			
 
				-		if (skip_free_areas_node(filter, nid))
			
 
				-			continue;
			
 
				-		pgdat_resize_lock(pgdat, &flags);
			
 
				-		present = pgdat->node_present_pages;
			
 
				-		for(i = 0; i < pgdat->node_spanned_pages; i++) {
			
 
				-			struct page *page;
			
 
				-			if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
			
 
				-				touch_nmi_watchdog();
			
 
				-			if (pfn_valid(pgdat->node_start_pfn + i))
			
 
				-				page = pfn_to_page(pgdat->node_start_pfn + i);
			
 
				-			else {
			
 
				-				i = vmemmap_find_next_valid_pfn(nid, i) - 1;
			
 
				-				continue;
			
 
				-			}
			
 
				-			if (PageReserved(page))
			
 
				-				reserved++;
			
 
				-			else if (PageSwapCache(page))
			
 
				-				cached++;
			
 
				-			else if (page_count(page))
			
 
				-				shared += page_count(page)-1;
			
 
				-		}
			
 
				-		pgdat_resize_unlock(pgdat, &flags);
			
 
				-		total_present += present;
			
 
				-		total_reserved += reserved;
			
 
				-		total_cached += cached;
			
 
				-		total_shared += shared;
			
 
				-		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
			
 
				-		       "shrd: %10d, swpd: %10d\n", nid,
			
 
				-		       present, reserved, shared, cached);
			
 
				-	}
			
 
				-	printk(KERN_INFO "%ld pages of RAM\n", total_present);
			
 
				-	printk(KERN_INFO "%d reserved pages\n", total_reserved);
			
 
				-	printk(KERN_INFO "%d pages shared\n", total_shared);
			
 
				-	printk(KERN_INFO "%d pages swap cached\n", total_cached);
			
 
				-	printk(KERN_INFO "Total of %ld pages in page table cache\n",
			
 
				-	       quicklist_total_size());
			
 
				-	printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * call_pernode_memory - use SRAT to call callback functions with node info
			
 
				  * @start: physical start of range
			
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -684,3 +684,51 @@ per_linux32_init(void)
 
				 }
			
 
				 
			
 
				 __initcall(per_linux32_init);
			
 
				+
			
 
				+/**
			
 
				+ * show_mem - give short summary of memory stats
			
 
				+ *
			
 
				+ * Shows a simple page count of reserved and used pages in the system.
			
 
				+ * For discontig machines, it does this on a per-pgdat basis.
			
 
				+ */
			
 
				+void show_mem(unsigned int filter)
			
 
				+{
			
 
				+	int total_reserved = 0;
			
 
				+	unsigned long total_present = 0;
			
 
				+	pg_data_t *pgdat;
			
 
				+
			
 
				+	printk(KERN_INFO "Mem-info:\n");
			
 
				+	show_free_areas(filter);
			
 
				+	printk(KERN_INFO "Node memory in pages:\n");
			
 
				+	for_each_online_pgdat(pgdat) {
			
 
				+		unsigned long present;
			
 
				+		unsigned long flags;
			
 
				+		int reserved = 0;
			
 
				+		int nid = pgdat->node_id;
			
 
				+		int zoneid;
			
 
				+
			
 
				+		if (skip_free_areas_node(filter, nid))
			
 
				+			continue;
			
 
				+		pgdat_resize_lock(pgdat, &flags);
			
 
				+
			
 
				+		for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
			
 
				+			struct zone *zone = &pgdat->node_zones[zoneid];
			
 
				+			if (!populated_zone(zone))
			
 
				+				continue;
			
 
				+
			
 
				+			reserved += zone->present_pages - zone->managed_pages;
			
 
				+		}
			
 
				+		present = pgdat->node_present_pages;
			
 
				+
			
 
				+		pgdat_resize_unlock(pgdat, &flags);
			
 
				+		total_present += present;
			
 
				+		total_reserved += reserved;
			
 
				+		printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, ",
			
 
				+		       nid, present, reserved);
			
 
				+	}
			
 
				+	printk(KERN_INFO "%ld pages of RAM\n", total_present);
			
 
				+	printk(KERN_INFO "%d reserved pages\n", total_reserved);
			
 
				+	printk(KERN_INFO "Total of %ld pages in page table cache\n",
			
 
				+	       quicklist_total_size());
			
 
				+	printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
			
 
				+}
			
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -204,7 +204,8 @@ static void __init do_init_bootmem(void)
 
				 		start_pfn = memblock_region_memory_base_pfn(reg);
			
 
				 		end_pfn = memblock_region_memory_end_pfn(reg);
			
 
				 		memblock_set_node(PFN_PHYS(start_pfn),
			
 
				-				  PFN_PHYS(end_pfn - start_pfn), 0);
			
 
				+				  PFN_PHYS(end_pfn - start_pfn),
			
 
				+				  &memblock.memory, 0);
			
 
				 	}
			
 
				 
			
 
				 	/* All of system RAM sits in node 0 for the non-NUMA case */
			
--- a/arch/metag/mm/numa.c
+++ b/arch/metag/mm/numa.c
@@ -42,7 +42,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 
				 	memblock_add(start, end - start);
			
 
				 
			
 
				 	memblock_set_node(PFN_PHYS(start_pfn),
			
 
				-			  PFN_PHYS(end_pfn - start_pfn), nid);
			
 
				+			  PFN_PHYS(end_pfn - start_pfn),
			
 
				+			  &memblock.memory, nid);
			
 
				 
			
 
				 	/* Node-local pgdat */
			
 
				 	pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
			
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -192,7 +192,8 @@ void __init setup_memory(void)
 
				 		start_pfn = memblock_region_memory_base_pfn(reg);
			
 
				 		end_pfn = memblock_region_memory_end_pfn(reg);
			
 
				 		memblock_set_node(start_pfn << PAGE_SHIFT,
			
 
				-					(end_pfn - start_pfn) << PAGE_SHIFT, 0);
			
 
				+				  (end_pfn - start_pfn) << PAGE_SHIFT,
			
 
				+				  &memblock.memory, 0);
			
 
				 	}
			
 
				 
			
 
				 	/* free bootmem is whole main memory */
			
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -645,55 +645,30 @@ EXPORT_SYMBOL(empty_zero_page);
 
				 
			
 
				 void show_mem(unsigned int filter)
			
 
				 {
			
 
				-	int i,free = 0,total = 0,reserved = 0;
			
 
				-	int shared = 0, cached = 0;
			
 
				+	int total = 0,reserved = 0;
			
 
				+	pg_data_t *pgdat;
			
 
				 
			
 
				 	printk(KERN_INFO "Mem-info:\n");
			
 
				 	show_free_areas(filter);
			
 
				-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
			
 
				-		return;
			
 
				-#ifndef CONFIG_DISCONTIGMEM
			
 
				-	i = max_mapnr;
			
 
				-	while (i-- > 0) {
			
 
				-		total++;
			
 
				-		if (PageReserved(mem_map+i))
			
 
				-			reserved++;
			
 
				-		else if (PageSwapCache(mem_map+i))
			
 
				-			cached++;
			
 
				-		else if (!page_count(&mem_map[i]))
			
 
				-			free++;
			
 
				-		else
			
 
				-			shared += page_count(&mem_map[i]) - 1;
			
 
				-	}
			
 
				-#else
			
 
				-	for (i = 0; i < npmem_ranges; i++) {
			
 
				-		int j;
			
 
				 
			
 
				-		for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
			
 
				-			struct page *p;
			
 
				-			unsigned long flags;
			
 
				-
			
 
				-			pgdat_resize_lock(NODE_DATA(i), &flags);
			
 
				-			p = nid_page_nr(i, j) - node_start_pfn(i);
			
 
				-
			
 
				-			total++;
			
 
				-			if (PageReserved(p))
			
 
				-				reserved++;
			
 
				-			else if (PageSwapCache(p))
			
 
				-				cached++;
			
 
				-			else if (!page_count(p))
			
 
				-				free++;
			
 
				-			else
			
 
				-				shared += page_count(p) - 1;
			
 
				-			pgdat_resize_unlock(NODE_DATA(i), &flags);
			
 
				-        	}
			
 
				+	for_each_online_pgdat(pgdat) {
			
 
				+		unsigned long flags;
			
 
				+		int zoneid;
			
 
				+
			
 
				+		pgdat_resize_lock(pgdat, &flags);
			
 
				+		for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
			
 
				+			struct zone *zone = &pgdat->node_zones[zoneid];
			
 
				+			if (!populated_zone(zone))
			
 
				+				continue;
			
 
				+
			
 
				+			total += zone->present_pages;
			
 
				+			reserved = zone->present_pages - zone->managed_pages;
			
 
				+		}
			
 
				+		pgdat_resize_unlock(pgdat, &flags);
			
 
				 	}
			
 
				-#endif
			
 
				+
			
 
				 	printk(KERN_INFO "%d pages of RAM\n", total);
			
 
				 	printk(KERN_INFO "%d reserved pages\n", reserved);
			
 
				-	printk(KERN_INFO "%d pages shared\n", shared);
			
 
				-	printk(KERN_INFO "%d pages swap cached\n", cached);
			
 
				-
			
 
				 
			
 
				 #ifdef CONFIG_DISCONTIGMEM
			
 
				 	{
			
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -209,7 +209,7 @@ void __init do_init_bootmem(void)
 
				 	/* Place all memblock_regions in the same node and merge contiguous
			
 
				 	 * memblock_regions
			
 
				 	 */
			
 
				-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
			
 
				+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
			
 
				 
			
 
				 	/* Add all physical memory to the bootmem map, mark each area
			
 
				 	 * present.
			
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -670,7 +670,8 @@ static void __init parse_drconf_memory(struct device_node *memory)
 
				 			node_set_online(nid);
			
 
				 			sz = numa_enforce_memory_limit(base, size);
			
 
				 			if (sz)
			
 
				-				memblock_set_node(base, sz, nid);
			
 
				+				memblock_set_node(base, sz,
			
 
				+						  &memblock.memory, nid);
			
 
				 		} while (--ranges);
			
 
				 	}
			
 
				 }
			
@@ -760,7 +761,7 @@ new_range:
 
				 				continue;
			
 
				 		}
			
 
				 
			
 
				-		memblock_set_node(start, size, nid);
			
 
				+		memblock_set_node(start, size, &memblock.memory, nid);
			
 
				 
			
 
				 		if (--ranges)
			
 
				 			goto new_range;
			
@@ -797,7 +798,8 @@ static void __init setup_nonnuma(void)
 
				 
			
 
				 		fake_numa_create_new_node(end_pfn, &nid);
			
 
				 		memblock_set_node(PFN_PHYS(start_pfn),
			
 
				-				  PFN_PHYS(end_pfn - start_pfn), nid);
			
 
				+				  PFN_PHYS(end_pfn - start_pfn),
			
 
				+				  &memblock.memory, nid);
			
 
				 		node_set_online(nid);
			
 
				 	}
			
 
				 }
			
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -2,7 +2,6 @@ menu "Machine selection"
 
				 
			
 
				 config SCORE
			
 
				        def_bool y
			
 
				-       select HAVE_GENERIC_HARDIRQS
			
 
				        select GENERIC_IRQ_SHOW
			
 
				        select GENERIC_IOMAP
			
 
				        select GENERIC_ATOMIC64
			
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -13,6 +13,7 @@
 
				 #include <linux/kdebug.h>
			
 
				 #include <linux/irq.h>
			
 
				 #include <linux/io.h>
			
 
				+#include <linux/sched.h>
			
 
				 #include <asm/cacheflush.h>
			
 
				 #include <asm/traps.h>
			
 
				 
			
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -230,8 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 
				 	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
			
 
				 			 PAGE_KERNEL);
			
 
				 
			
 
				-	memblock_set_node(PFN_PHYS(start_pfn),
			
 
				-			  PFN_PHYS(end_pfn - start_pfn), nid);
			
 
				+	memblock_set_node(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
			
 
				+			  &memblock.memory, nid);
			
 
				 }
			
 
				 
			
 
				 void __init __weak plat_early_device_setup(void)
			
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1021,7 +1021,8 @@ static void __init add_node_ranges(void)
 
				 				"start[%lx] end[%lx]\n",
			
 
				 				nid, start, this_end);
			
 
				 
			
 
				-			memblock_set_node(start, this_end - start, nid);
			
 
				+			memblock_set_node(start, this_end - start,
			
 
				+					  &memblock.memory, nid);
			
 
				 			start = this_end;
			
 
				 		}
			
 
				 	}
			
@@ -1325,7 +1326,7 @@ static void __init bootmem_init_nonnuma(void)
 
				 	       (top_of_ram - total_ram) >> 20);
			
 
				 
			
 
				 	init_node_masks_nonnuma();
			
 
				-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
			
 
				+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
			
 
				 	allocate_node_data(0);
			
 
				 	node_set_online(0);
			
 
				 }
			
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -66,9 +66,6 @@ void show_mem(unsigned int filter)
 
				 	printk(KERN_DEFAULT "Mem-info:\n");
			
 
				 	show_free_areas(filter);
			
 
				 
			
 
				-	if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
			
 
				-		return;
			
 
				-
			
 
				 	for_each_bank(i, mi) {
			
 
				 		struct membank *bank = &mi->bank[i];
			
 
				 		unsigned int pfn1, pfn2;
			
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned long pagenr);
 
				 extern unsigned long max_low_pfn_mapped;
			
 
				 extern unsigned long max_pfn_mapped;
			
 
				 
			
 
				-static inline phys_addr_t get_max_mapped(void)
			
 
				+static inline phys_addr_t get_max_low_mapped(void)
			
 
				 {
			
 
				-	return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
			
 
				+	return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT;
			
 
				 }
			
 
				 
			
 
				 bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
			
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void)
 
				 
			
 
				 	corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
			
 
				 
			
 
				-	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
			
 
				+	for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
			
 
				 		start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
			
 
				 				PAGE_SIZE, corruption_check_size);
			
 
				 		end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
			
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void)
 
				 		nr_pages += end_pfn - start_pfn;
			
 
				 	}
			
 
				 
			
 
				-	for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
			
 
				+	for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
			
 
				 		start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
			
 
				 		end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
			
 
				 		if (start_pfn < end_pfn)
			
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
 
				 
			
 
				 	setup_real_mode();
			
 
				 
			
 
				-	memblock_set_current_limit(get_max_mapped());
			
 
				+	memblock_set_current_limit(get_max_low_mapped());
			
 
				 	dma_contiguous_reserve(0);
			
 
				 
			
 
				 	/*
			
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -665,7 +665,7 @@ void __init initmem_init(void)
 
				 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
			
 
				 #endif
			
 
				 
			
 
				-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
			
 
				+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
			
 
				 	sparse_memory_present_with_active_regions(0);
			
 
				 
			
 
				 #ifdef CONFIG_FLATMEM
			
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start,
 
				 #ifndef CONFIG_NUMA
			
 
				 void __init initmem_init(void)
			
 
				 {
			
 
				-	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
			
 
				+	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
			
 
				 }
			
 
				 #endif
			
 
				 
			
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end)
 
				 	u64 i;
			
 
				 	phys_addr_t this_start, this_end;
			
 
				 
			
 
				-	for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
			
 
				+	for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) {
			
 
				 		this_start = clamp_t(phys_addr_t, this_start, start, end);
			
 
				 		this_end = clamp_t(phys_addr_t, this_end, start, end);
			
 
				 		if (this_start < this_end) {
			
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -491,7 +491,16 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
				 
			
 
				 	for (i = 0; i < mi->nr_blks; i++) {
			
 
				 		struct numa_memblk *mb = &mi->blk[i];
			
 
				-		memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
			
 
				+		memblock_set_node(mb->start, mb->end - mb->start,
			
 
				+				  &memblock.memory, mb->nid);
			
 
				+
			
 
				+		/*
			
 
				+		 * At this time, all memory regions reserved by memblock are
			
 
				+		 * used by the kernel. Set the nid in memblock.reserved will
			
 
				+		 * mark out all the nodes the kernel resides in.
			
 
				+		 */
			
 
				+		memblock_set_node(mb->start, mb->end - mb->start,
			
 
				+				  &memblock.reserved, mb->nid);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -553,6 +562,30 @@ static void __init numa_init_array(void)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static void __init numa_clear_kernel_node_hotplug(void)
			
 
				+{
			
 
				+	int i, nid;
			
 
				+	nodemask_t numa_kernel_nodes;
			
 
				+	unsigned long start, end;
			
 
				+	struct memblock_type *type = &memblock.reserved;
			
 
				+
			
 
				+	/* Mark all kernel nodes. */
			
 
				+	for (i = 0; i < type->cnt; i++)
			
 
				+		node_set(type->regions[i].nid, numa_kernel_nodes);
			
 
				+
			
 
				+	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
			
 
				+	for (i = 0; i < numa_meminfo.nr_blks; i++) {
			
 
				+		nid = numa_meminfo.blk[i].nid;
			
 
				+		if (!node_isset(nid, numa_kernel_nodes))
			
 
				+			continue;
			
 
				+
			
 
				+		start = numa_meminfo.blk[i].start;
			
 
				+		end = numa_meminfo.blk[i].end;
			
 
				+
			
 
				+		memblock_clear_hotplug(start, end - start);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int __init numa_init(int (*init_func)(void))
			
 
				 {
			
 
				 	int i;
			
@@ -565,7 +598,12 @@ static int __init numa_init(int (*init_func)(void))
 
				 	nodes_clear(node_possible_map);
			
 
				 	nodes_clear(node_online_map);
			
 
				 	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
			
 
				-	WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
			
 
				+	WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
			
 
				+				  MAX_NUMNODES));
			
 
				+	WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
			
 
				+				  MAX_NUMNODES));
			
 
				+	/* In case that parsing SRAT failed. */
			
 
				+	WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
			
 
				 	numa_reset_distance();
			
 
				 
			
 
				 	ret = init_func();
			
@@ -601,6 +639,16 @@ static int __init numa_init(int (*init_func)(void))
 
				 			numa_clear_node(i);
			
 
				 	}
			
 
				 	numa_init_array();
			
 
				+
			
 
				+	/*
			
 
				+	 * At very early time, the kernel have to use some memory such as
			
 
				+	 * loading the kernel image. We cannot prevent this anyway. So any
			
 
				+	 * node the kernel resides in should be un-hotpluggable.
			
 
				+	 *
			
 
				+	 * And when we come here, numa_init() won't fail.
			
 
				+	 */
			
 
				+	numa_clear_kernel_node_hotplug();
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -181,6 +181,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 
				 		(unsigned long long) start, (unsigned long long) end - 1,
			
 
				 		hotpluggable ? " hotplug" : "");
			
 
				 
			
 
				+	/* Mark hotplug range in memblock. */
			
 
				+	if (hotpluggable && memblock_mark_hotplug(start, ma->length))
			
 
				+		pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
			
 
				+			(unsigned long long)start, (unsigned long long)end - 1);
			
 
				+
			
 
				 	return 0;
			
 
				 out_err_bad_srat:
			
 
				 	bad_srat();
			
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -22,7 +22,6 @@
 
				 #include <linux/device.h>
			
 
				 #include <linux/highmem.h>
			
 
				 #include <linux/backing-dev.h>
			
 
				-#include <linux/bootmem.h>
			
 
				 #include <linux/splice.h>
			
 
				 #include <linux/pfn.h>
			
 
				 #include <linux/export.h>
			
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -324,7 +324,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 
				 {
			
 
				 	struct firmware_map_entry *entry;
			
 
				 
			
 
				-	entry = alloc_bootmem(sizeof(struct firmware_map_entry));
			
 
				+	entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
			
 
				 	if (WARN_ON(!entry))
			
 
				 		return -ENOMEM;
			
 
				 
			
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -917,7 +917,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
 
				 
			
 
				 		/* If range covers entire pagetable, free it */
			
 
				 		if (!(start_pfn > level_pfn ||
			
 
				-		      last_pfn < level_pfn + level_size(level))) {
			
 
				+		      last_pfn < level_pfn + level_size(level) - 1)) {
			
 
				 			dma_clear_pte(pte);
			
 
				 			domain_flush_cache(domain, pte, sizeof(*pte));
			
 
				 			free_pgtable_page(level_pte);
			
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -680,7 +680,8 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
 
				 	struct i2c_msg			__user *tmsgs;
			
 
				 	struct i2c_msg32		__user *umsgs;
			
 
				 	compat_caddr_t			datap;
			
 
				-	int				nmsgs, i;
			
 
				+	u32				nmsgs;
			
 
				+	int				i;
			
 
				 
			
 
				 	if (get_user(nmsgs, &udata->nmsgs))
			
 
				 		return -EFAULT;
			
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -82,20 +82,23 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 
				  * events.
			
 
				  */
			
 
				 static int dnotify_handle_event(struct fsnotify_group *group,
			
 
				+				struct inode *inode,
			
 
				 				struct fsnotify_mark *inode_mark,
			
 
				 				struct fsnotify_mark *vfsmount_mark,
			
 
				-				struct fsnotify_event *event)
			
 
				+				u32 mask, void *data, int data_type,
			
 
				+				const unsigned char *file_name)
			
 
				 {
			
 
				 	struct dnotify_mark *dn_mark;
			
 
				-	struct inode *to_tell;
			
 
				 	struct dnotify_struct *dn;
			
 
				 	struct dnotify_struct **prev;
			
 
				 	struct fown_struct *fown;
			
 
				-	__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
			
 
				+	__u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
			
 
				 
			
 
				-	BUG_ON(vfsmount_mark);
			
 
				+	/* not a dir, dnotify doesn't care */
			
 
				+	if (!S_ISDIR(inode->i_mode))
			
 
				+		return 0;
			
 
				 
			
 
				-	to_tell = event->to_tell;
			
 
				+	BUG_ON(vfsmount_mark);
			
 
				 
			
 
				 	dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
			
 
				 
			
@@ -122,23 +125,6 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Given an inode and mask determine if dnotify would be interested in sending
			
 
				- * userspace notification for that pair.
			
 
				- */
			
 
				-static bool dnotify_should_send_event(struct fsnotify_group *group,
			
 
				-				      struct inode *inode,
			
 
				-				      struct fsnotify_mark *inode_mark,
			
 
				-				      struct fsnotify_mark *vfsmount_mark,
			
 
				-				      __u32 mask, void *data, int data_type)
			
 
				-{
			
 
				-	/* not a dir, dnotify doesn't care */
			
 
				-	if (!S_ISDIR(inode->i_mode))
			
 
				-		return false;
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				 static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
			
 
				 {
			
 
				 	struct dnotify_mark *dn_mark = container_of(fsn_mark,
			
@@ -152,10 +138,6 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 
				 
			
 
				 static struct fsnotify_ops dnotify_fsnotify_ops = {
			
 
				 	.handle_event = dnotify_handle_event,
			
 
				-	.should_send_event = dnotify_should_send_event,
			
 
				-	.free_group_priv = NULL,
			
 
				-	.freeing_mark = NULL,
			
 
				-	.free_event_priv = NULL,
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -9,31 +9,27 @@
 
				 #include <linux/types.h>
			
 
				 #include <linux/wait.h>
			
 
				 
			
 
				-static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
			
 
				+#include "fanotify.h"
			
 
				+
			
 
				+static bool should_merge(struct fsnotify_event *old_fsn,
			
 
				+			 struct fsnotify_event *new_fsn)
			
 
				 {
			
 
				-	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
			
 
				+	struct fanotify_event_info *old, *new;
			
 
				 
			
 
				-	if (old->to_tell == new->to_tell &&
			
 
				-	    old->data_type == new->data_type &&
			
 
				-	    old->tgid == new->tgid) {
			
 
				-		switch (old->data_type) {
			
 
				-		case (FSNOTIFY_EVENT_PATH):
			
 
				 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				-			/* dont merge two permission events */
			
 
				-			if ((old->mask & FAN_ALL_PERM_EVENTS) &&
			
 
				-			    (new->mask & FAN_ALL_PERM_EVENTS))
			
 
				-				return false;
			
 
				+	/* dont merge two permission events */
			
 
				+	if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
			
 
				+	    (new_fsn->mask & FAN_ALL_PERM_EVENTS))
			
 
				+		return false;
			
 
				 #endif
			
 
				-			if ((old->path.mnt == new->path.mnt) &&
			
 
				-			    (old->path.dentry == new->path.dentry))
			
 
				-				return true;
			
 
				-			break;
			
 
				-		case (FSNOTIFY_EVENT_NONE):
			
 
				-			return true;
			
 
				-		default:
			
 
				-			BUG();
			
 
				-		};
			
 
				-	}
			
 
				+	pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
			
 
				+	old = FANOTIFY_E(old_fsn);
			
 
				+	new = FANOTIFY_E(new_fsn);
			
 
				+
			
 
				+	if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
			
 
				+	    old->path.mnt == new->path.mnt &&
			
 
				+	    old->path.dentry == new->path.dentry)
			
 
				+		return true;
			
 
				 	return false;
			
 
				 }
			
 
				 
			
@@ -41,59 +37,28 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 
				 static struct fsnotify_event *fanotify_merge(struct list_head *list,
			
 
				 					     struct fsnotify_event *event)
			
 
				 {
			
 
				-	struct fsnotify_event_holder *test_holder;
			
 
				-	struct fsnotify_event *test_event = NULL;
			
 
				-	struct fsnotify_event *new_event;
			
 
				+	struct fsnotify_event *test_event;
			
 
				+	bool do_merge = false;
			
 
				 
			
 
				 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
			
 
				 
			
 
				-
			
 
				-	list_for_each_entry_reverse(test_holder, list, event_list) {
			
 
				-		if (should_merge(test_holder->event, event)) {
			
 
				-			test_event = test_holder->event;
			
 
				+	list_for_each_entry_reverse(test_event, list, list) {
			
 
				+		if (should_merge(test_event, event)) {
			
 
				+			do_merge = true;
			
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (!test_event)
			
 
				+	if (!do_merge)
			
 
				 		return NULL;
			
 
				 
			
 
				-	fsnotify_get_event(test_event);
			
 
				-
			
 
				-	/* if they are exactly the same we are done */
			
 
				-	if (test_event->mask == event->mask)
			
 
				-		return test_event;
			
 
				-
			
 
				-	/*
			
 
				-	 * if the refcnt == 2 this is the only queue
			
 
				-	 * for this event and so we can update the mask
			
 
				-	 * in place.
			
 
				-	 */
			
 
				-	if (atomic_read(&test_event->refcnt) == 2) {
			
 
				-		test_event->mask |= event->mask;
			
 
				-		return test_event;
			
 
				-	}
			
 
				-
			
 
				-	new_event = fsnotify_clone_event(test_event);
			
 
				-
			
 
				-	/* done with test_event */
			
 
				-	fsnotify_put_event(test_event);
			
 
				-
			
 
				-	/* couldn't allocate memory, merge was not possible */
			
 
				-	if (unlikely(!new_event))
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-
			
 
				-	/* build new event and replace it on the list */
			
 
				-	new_event->mask = (test_event->mask | event->mask);
			
 
				-	fsnotify_replace_event(test_holder, new_event);
			
 
				-
			
 
				-	/* we hold a reference on new_event from clone_event */
			
 
				-	return new_event;
			
 
				+	test_event->mask |= event->mask;
			
 
				+	return test_event;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				 static int fanotify_get_response_from_access(struct fsnotify_group *group,
			
 
				-					     struct fsnotify_event *event)
			
 
				+					     struct fanotify_event_info *event)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
@@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
				 		return 0;
			
 
				 
			
 
				 	/* userspace responded, convert to something usable */
			
 
				-	spin_lock(&event->lock);
			
 
				 	switch (event->response) {
			
 
				 	case FAN_ALLOW:
			
 
				 		ret = 0;
			
@@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
				 		ret = -EPERM;
			
 
				 	}
			
 
				 	event->response = 0;
			
 
				-	spin_unlock(&event->lock);
			
 
				 
			
 
				 	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
			
 
				 		 group, event, ret);
			
@@ -125,58 +88,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static int fanotify_handle_event(struct fsnotify_group *group,
			
 
				-				 struct fsnotify_mark *inode_mark,
			
 
				-				 struct fsnotify_mark *fanotify_mark,
			
 
				-				 struct fsnotify_event *event)
			
 
				-{
			
 
				-	int ret = 0;
			
 
				-	struct fsnotify_event *notify_event = NULL;
			
 
				-
			
 
				-	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
			
 
				-	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
			
 
				-	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
			
 
				-	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
			
 
				-	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
			
 
				-	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
			
 
				-	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
			
 
				-	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
			
 
				-	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
			
 
				-	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
			
 
				-
			
 
				-	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
			
 
				-
			
 
				-	notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
			
 
				-	if (IS_ERR(notify_event))
			
 
				-		return PTR_ERR(notify_event);
			
 
				-
			
 
				-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				-	if (event->mask & FAN_ALL_PERM_EVENTS) {
			
 
				-		/* if we merged we need to wait on the new event */
			
 
				-		if (notify_event)
			
 
				-			event = notify_event;
			
 
				-		ret = fanotify_get_response_from_access(group, event);
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	if (notify_event)
			
 
				-		fsnotify_put_event(notify_event);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static bool fanotify_should_send_event(struct fsnotify_group *group,
			
 
				-				       struct inode *to_tell,
			
 
				-				       struct fsnotify_mark *inode_mark,
			
 
				+static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
			
 
				 				       struct fsnotify_mark *vfsmnt_mark,
			
 
				-				       __u32 event_mask, void *data, int data_type)
			
 
				+				       u32 event_mask,
			
 
				+				       void *data, int data_type)
			
 
				 {
			
 
				 	__u32 marks_mask, marks_ignored_mask;
			
 
				 	struct path *path = data;
			
 
				 
			
 
				-	pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
			
 
				-		 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
			
 
				-		 inode_mark, vfsmnt_mark, event_mask, data, data_type);
			
 
				+	pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
			
 
				+		 " data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
			
 
				+		 event_mask, data, data_type);
			
 
				 
			
 
				 	/* if we don't have enough info to send an event to userspace say no */
			
 
				 	if (data_type != FSNOTIFY_EVENT_PATH)
			
@@ -217,6 +139,74 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				+static int fanotify_handle_event(struct fsnotify_group *group,
			
 
				+				 struct inode *inode,
			
 
				+				 struct fsnotify_mark *inode_mark,
			
 
				+				 struct fsnotify_mark *fanotify_mark,
			
 
				+				 u32 mask, void *data, int data_type,
			
 
				+				 const unsigned char *file_name)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	struct fanotify_event_info *event;
			
 
				+	struct fsnotify_event *fsn_event;
			
 
				+	struct fsnotify_event *notify_fsn_event;
			
 
				+
			
 
				+	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
			
 
				+	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
			
 
				+	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
			
 
				+	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
			
 
				+	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
			
 
				+	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
			
 
				+	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
			
 
				+	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
			
 
				+	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
			
 
				+	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
			
 
				+
			
 
				+	if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data,
			
 
				+					data_type))
			
 
				+		return 0;
			
 
				+
			
 
				+	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
			
 
				+		 mask);
			
 
				+
			
 
				+	event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
			
 
				+	if (unlikely(!event))
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	fsn_event = &event->fse;
			
 
				+	fsnotify_init_event(fsn_event, inode, mask);
			
 
				+	event->tgid = get_pid(task_tgid(current));
			
 
				+	if (data_type == FSNOTIFY_EVENT_PATH) {
			
 
				+		struct path *path = data;
			
 
				+		event->path = *path;
			
 
				+		path_get(&event->path);
			
 
				+	} else {
			
 
				+		event->path.mnt = NULL;
			
 
				+		event->path.dentry = NULL;
			
 
				+	}
			
 
				+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				+	event->response = 0;
			
 
				+#endif
			
 
				+
			
 
				+	notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
			
 
				+						     fanotify_merge);
			
 
				+	if (notify_fsn_event) {
			
 
				+		/* Our event wasn't used in the end. Free it. */
			
 
				+		fsnotify_destroy_event(group, fsn_event);
			
 
				+		if (IS_ERR(notify_fsn_event))
			
 
				+			return PTR_ERR(notify_fsn_event);
			
 
				+		/* We need to ask about a different events after a merge... */
			
 
				+		event = FANOTIFY_E(notify_fsn_event);
			
 
				+		fsn_event = notify_fsn_event;
			
 
				+	}
			
 
				+
			
 
				+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				+	if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
			
 
				+		ret = fanotify_get_response_from_access(group, event);
			
 
				+#endif
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 static void fanotify_free_group_priv(struct fsnotify_group *group)
			
 
				 {
			
 
				 	struct user_struct *user;
			
@@ -226,10 +216,18 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
 
				 	free_uid(user);
			
 
				 }
			
 
				 
			
 
				+static void fanotify_free_event(struct fsnotify_event *fsn_event)
			
 
				+{
			
 
				+	struct fanotify_event_info *event;
			
 
				+
			
 
				+	event = FANOTIFY_E(fsn_event);
			
 
				+	path_put(&event->path);
			
 
				+	put_pid(event->tgid);
			
 
				+	kmem_cache_free(fanotify_event_cachep, event);
			
 
				+}
			
 
				+
			
 
				 const struct fsnotify_ops fanotify_fsnotify_ops = {
			
 
				 	.handle_event = fanotify_handle_event,
			
 
				-	.should_send_event = fanotify_should_send_event,
			
 
				 	.free_group_priv = fanotify_free_group_priv,
			
 
				-	.free_event_priv = NULL,
			
 
				-	.freeing_mark = NULL,
			
 
				+	.free_event = fanotify_free_event,
			
 
				 };
			
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -0,0 +1,23 @@
 
				+#include <linux/fsnotify_backend.h>
			
 
				+#include <linux/path.h>
			
 
				+#include <linux/slab.h>
			
 
				+
			
 
				+extern struct kmem_cache *fanotify_event_cachep;
			
 
				+
			
 
				+struct fanotify_event_info {
			
 
				+	struct fsnotify_event fse;
			
 
				+	/*
			
 
				+	 * We hold ref to this path so it may be dereferenced at any point
			
 
				+	 * during this object's lifetime
			
 
				+	 */
			
 
				+	struct path path;
			
 
				+	struct pid *tgid;
			
 
				+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				+	u32 response;	/* userspace answer to question */
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
			
 
				+{
			
 
				+	return container_of(fse, struct fanotify_event_info, fse);
			
 
				+}
			
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -19,6 +19,7 @@
 
				 
			
 
				 #include "../../mount.h"
			
 
				 #include "../fdinfo.h"
			
 
				+#include "fanotify.h"
			
 
				 
			
 
				 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
			
 
				 #define FANOTIFY_DEFAULT_MAX_MARKS	8192
			
@@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
				 
			
 
				 static struct kmem_cache *fanotify_mark_cache __read_mostly;
			
 
				 static struct kmem_cache *fanotify_response_event_cache __read_mostly;
			
 
				+struct kmem_cache *fanotify_event_cachep __read_mostly;
			
 
				 
			
 
				 struct fanotify_response_event {
			
 
				 	struct list_head list;
			
 
				 	__s32 fd;
			
 
				-	struct fsnotify_event *event;
			
 
				+	struct fanotify_event_info *event;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
				 }
			
 
				 
			
 
				 static int create_fd(struct fsnotify_group *group,
			
 
				-			struct fsnotify_event *event,
			
 
				-			struct file **file)
			
 
				+		     struct fanotify_event_info *event,
			
 
				+		     struct file **file)
			
 
				 {
			
 
				 	int client_fd;
			
 
				 	struct file *new_file;
			
@@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group,
 
				 	if (client_fd < 0)
			
 
				 		return client_fd;
			
 
				 
			
 
				-	if (event->data_type != FSNOTIFY_EVENT_PATH) {
			
 
				-		WARN_ON(1);
			
 
				-		put_unused_fd(client_fd);
			
 
				-		return -EINVAL;
			
 
				-	}
			
 
				-
			
 
				 	/*
			
 
				 	 * we need a new file handle for the userspace program so it can read even if it was
			
 
				 	 * originally opened O_WRONLY.
			
@@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group,
 
				 }
			
 
				 
			
 
				 static int fill_event_metadata(struct fsnotify_group *group,
			
 
				-				   struct fanotify_event_metadata *metadata,
			
 
				-				   struct fsnotify_event *event,
			
 
				-				   struct file **file)
			
 
				+			       struct fanotify_event_metadata *metadata,
			
 
				+			       struct fsnotify_event *fsn_event,
			
 
				+			       struct file **file)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				+	struct fanotify_event_info *event;
			
 
				 
			
 
				 	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
			
 
				-		 group, metadata, event);
			
 
				+		 group, metadata, fsn_event);
			
 
				 
			
 
				 	*file = NULL;
			
 
				+	event = container_of(fsn_event, struct fanotify_event_info, fse);
			
 
				 	metadata->event_len = FAN_EVENT_METADATA_LEN;
			
 
				 	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
			
 
				 	metadata->vers = FANOTIFY_METADATA_VERSION;
			
 
				 	metadata->reserved = 0;
			
 
				-	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
			
 
				+	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
			
 
				 	metadata->pid = pid_vnr(event->tgid);
			
 
				-	if (unlikely(event->mask & FAN_Q_OVERFLOW))
			
 
				+	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
			
 
				 		metadata->fd = FAN_NOFD;
			
 
				 	else {
			
 
				 		metadata->fd = create_fd(group, event, file);
			
@@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 
				 	if (!re)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	re->event = event;
			
 
				+	re->event = FANOTIFY_E(event);
			
 
				 	re->fd = fd;
			
 
				 
			
 
				 	mutex_lock(&group->fanotify_data.access_mutex);
			
@@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 
				 	if (atomic_read(&group->fanotify_data.bypass_perm)) {
			
 
				 		mutex_unlock(&group->fanotify_data.access_mutex);
			
 
				 		kmem_cache_free(fanotify_response_event_cache, re);
			
 
				-		event->response = FAN_ALLOW;
			
 
				+		FANOTIFY_E(event)->response = FAN_ALLOW;
			
 
				 		return 0;
			
 
				 	}
			
 
				 		
			
@@ -273,7 +271,7 @@ out_close_fd:
 
				 out:
			
 
				 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				 	if (event->mask & FAN_ALL_PERM_EVENTS) {
			
 
				-		event->response = FAN_DENY;
			
 
				+		FANOTIFY_E(event)->response = FAN_DENY;
			
 
				 		wake_up(&group->fanotify_data.access_waitq);
			
 
				 	}
			
 
				 #endif
			
@@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 
				 			if (IS_ERR(kevent))
			
 
				 				break;
			
 
				 			ret = copy_event_to_user(group, kevent, buf);
			
 
				-			fsnotify_put_event(kevent);
			
 
				+			fsnotify_destroy_event(group, kevent);
			
 
				 			if (ret < 0)
			
 
				 				break;
			
 
				 			buf += ret;
			
@@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
				 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
			
 
				 {
			
 
				 	struct fsnotify_group *group;
			
 
				-	struct fsnotify_event_holder *holder;
			
 
				+	struct fsnotify_event *fsn_event;
			
 
				 	void __user *p;
			
 
				 	int ret = -ENOTTY;
			
 
				 	size_t send_len = 0;
			
@@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 
				 	switch (cmd) {
			
 
				 	case FIONREAD:
			
 
				 		mutex_lock(&group->notification_mutex);
			
 
				-		list_for_each_entry(holder, &group->notification_list, event_list)
			
 
				+		list_for_each_entry(fsn_event, &group->notification_list, list)
			
 
				 			send_len += FAN_EVENT_METADATA_LEN;
			
 
				 		mutex_unlock(&group->notification_mutex);
			
 
				 		ret = put_user(send_len, (int __user *) p);
			
@@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void)
 
				 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
			
 
				 	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
			
 
				 						   SLAB_PANIC);
			
 
				+	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
 
				 			 struct fsnotify_mark *vfsmount_mark,
			
 
				 			 __u32 mask, void *data,
			
 
				 			 int data_is, u32 cookie,
			
 
				-			 const unsigned char *file_name,
			
 
				-			 struct fsnotify_event **event)
			
 
				+			 const unsigned char *file_name)
			
 
				 {
			
 
				 	struct fsnotify_group *group = NULL;
			
 
				 	__u32 inode_test_mask = 0;
			
@@ -170,27 +169,17 @@ static int send_to_group(struct inode *to_tell,
 
				 
			
 
				 	pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
			
 
				 		 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
			
 
				-		 " data=%p data_is=%d cookie=%d event=%p\n",
			
 
				+		 " data=%p data_is=%d cookie=%d\n",
			
 
				 		 __func__, group, to_tell, mask, inode_mark,
			
 
				 		 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
			
 
				-		 data_is, cookie, *event);
			
 
				+		 data_is, cookie);
			
 
				 
			
 
				 	if (!inode_test_mask && !vfsmount_test_mask)
			
 
				 		return 0;
			
 
				 
			
 
				-	if (group->ops->should_send_event(group, to_tell, inode_mark,
			
 
				-					  vfsmount_mark, mask, data,
			
 
				-					  data_is) == false)
			
 
				-		return 0;
			
 
				-
			
 
				-	if (!*event) {
			
 
				-		*event = fsnotify_create_event(to_tell, mask, data,
			
 
				-						data_is, file_name,
			
 
				-						cookie, GFP_KERNEL);
			
 
				-		if (!*event)
			
 
				-			return -ENOMEM;
			
 
				-	}
			
 
				-	return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
			
 
				+	return group->ops->handle_event(group, to_tell, inode_mark,
			
 
				+					vfsmount_mark, mask, data, data_is,
			
 
				+					file_name);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -205,7 +194,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
				 	struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
			
 
				 	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
			
 
				 	struct fsnotify_group *inode_group, *vfsmount_group;
			
 
				-	struct fsnotify_event *event = NULL;
			
 
				 	struct mount *mnt;
			
 
				 	int idx, ret = 0;
			
 
				 	/* global tests shouldn't care about events on child only the specific event */
			
@@ -258,18 +246,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
				 
			
 
				 		if (inode_group > vfsmount_group) {
			
 
				 			/* handle inode */
			
 
				-			ret = send_to_group(to_tell, inode_mark, NULL, mask, data,
			
 
				-					    data_is, cookie, file_name, &event);
			
 
				+			ret = send_to_group(to_tell, inode_mark, NULL, mask,
			
 
				+					    data, data_is, cookie, file_name);
			
 
				 			/* we didn't use the vfsmount_mark */
			
 
				 			vfsmount_group = NULL;
			
 
				 		} else if (vfsmount_group > inode_group) {
			
 
				-			ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data,
			
 
				-					    data_is, cookie, file_name, &event);
			
 
				+			ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
			
 
				+					    data, data_is, cookie, file_name);
			
 
				 			inode_group = NULL;
			
 
				 		} else {
			
 
				 			ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
			
 
				-					    mask, data, data_is, cookie, file_name,
			
 
				-					    &event);
			
 
				+					    mask, data, data_is, cookie,
			
 
				+					    file_name);
			
 
				 		}
			
 
				 
			
 
				 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
			
@@ -285,12 +273,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
				 	ret = 0;
			
 
				 out:
			
 
				 	srcu_read_unlock(&fsnotify_mark_srcu, idx);
			
 
				-	/*
			
 
				-	 * fsnotify_create_event() took a reference so the event can't be cleaned
			
 
				-	 * up while we are still trying to add it to lists, drop that one.
			
 
				-	 */
			
 
				-	if (event)
			
 
				-		fsnotify_put_event(event);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
				 	INIT_LIST_HEAD(&group->marks_list);
			
 
				 
			
 
				 	group->ops = ops;
			
 
				+	fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
			
 
				 
			
 
				 	return group;
			
 
				 }
			
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -2,11 +2,12 @@
 
				 #include <linux/inotify.h>
			
 
				 #include <linux/slab.h> /* struct kmem_cache */
			
 
				 
			
 
				-extern struct kmem_cache *event_priv_cachep;
			
 
				-
			
 
				-struct inotify_event_private_data {
			
 
				-	struct fsnotify_event_private_data fsnotify_event_priv_data;
			
 
				+struct inotify_event_info {
			
 
				+	struct fsnotify_event fse;
			
 
				 	int wd;
			
 
				+	u32 sync_cookie;
			
 
				+	int name_len;
			
 
				+	char name[];
			
 
				 };
			
 
				 
			
 
				 struct inotify_inode_mark {
			
@@ -14,8 +15,18 @@ struct inotify_inode_mark {
 
				 	int wd;
			
 
				 };
			
 
				 
			
 
				+static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
			
 
				+{
			
 
				+	return container_of(fse, struct inotify_event_info, fse);
			
 
				+}
			
 
				+
			
 
				 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
			
 
				 					   struct fsnotify_group *group);
			
 
				-extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
			
 
				+extern int inotify_handle_event(struct fsnotify_group *group,
			
 
				+				struct inode *inode,
			
 
				+				struct fsnotify_mark *inode_mark,
			
 
				+				struct fsnotify_mark *vfsmount_mark,
			
 
				+				u32 mask, void *data, int data_type,
			
 
				+				const unsigned char *file_name);
			
 
				 
			
 
				 extern const struct fsnotify_ops inotify_fsnotify_ops;
			
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -34,100 +34,87 @@
 
				 #include "inotify.h"
			
 
				 
			
 
				 /*
			
 
				- * Check if 2 events contain the same information.  We do not compare private data
			
 
				- * but at this moment that isn't a problem for any know fsnotify listeners.
			
 
				+ * Check if 2 events contain the same information.
			
 
				  */
			
 
				-static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
			
 
				+static bool event_compare(struct fsnotify_event *old_fsn,
			
 
				+			  struct fsnotify_event *new_fsn)
			
 
				 {
			
 
				-	if ((old->mask == new->mask) &&
			
 
				-	    (old->to_tell == new->to_tell) &&
			
 
				-	    (old->data_type == new->data_type) &&
			
 
				-	    (old->name_len == new->name_len)) {
			
 
				-		switch (old->data_type) {
			
 
				-		case (FSNOTIFY_EVENT_INODE):
			
 
				-			/* remember, after old was put on the wait_q we aren't
			
 
				-			 * allowed to look at the inode any more, only thing
			
 
				-			 * left to check was if the file_name is the same */
			
 
				-			if (!old->name_len ||
			
 
				-			    !strcmp(old->file_name, new->file_name))
			
 
				-				return true;
			
 
				-			break;
			
 
				-		case (FSNOTIFY_EVENT_PATH):
			
 
				-			if ((old->path.mnt == new->path.mnt) &&
			
 
				-			    (old->path.dentry == new->path.dentry))
			
 
				-				return true;
			
 
				-			break;
			
 
				-		case (FSNOTIFY_EVENT_NONE):
			
 
				-			if (old->mask & FS_Q_OVERFLOW)
			
 
				-				return true;
			
 
				-			else if (old->mask & FS_IN_IGNORED)
			
 
				-				return false;
			
 
				-			return true;
			
 
				-		};
			
 
				-	}
			
 
				+	struct inotify_event_info *old, *new;
			
 
				+
			
 
				+	if (old_fsn->mask & FS_IN_IGNORED)
			
 
				+		return false;
			
 
				+	old = INOTIFY_E(old_fsn);
			
 
				+	new = INOTIFY_E(new_fsn);
			
 
				+	if ((old_fsn->mask == new_fsn->mask) &&
			
 
				+	    (old_fsn->inode == new_fsn->inode) &&
			
 
				+	    (old->name_len == new->name_len) &&
			
 
				+	    (!old->name_len || !strcmp(old->name, new->name)))
			
 
				+		return true;
			
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				 static struct fsnotify_event *inotify_merge(struct list_head *list,
			
 
				 					    struct fsnotify_event *event)
			
 
				 {
			
 
				-	struct fsnotify_event_holder *last_holder;
			
 
				 	struct fsnotify_event *last_event;
			
 
				 
			
 
				-	/* and the list better be locked by something too */
			
 
				-	spin_lock(&event->lock);
			
 
				-
			
 
				-	last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
			
 
				-	last_event = last_holder->event;
			
 
				-	if (event_compare(last_event, event))
			
 
				-		fsnotify_get_event(last_event);
			
 
				-	else
			
 
				-		last_event = NULL;
			
 
				-
			
 
				-	spin_unlock(&event->lock);
			
 
				-
			
 
				+	last_event = list_entry(list->prev, struct fsnotify_event, list);
			
 
				+	if (!event_compare(last_event, event))
			
 
				+		return NULL;
			
 
				 	return last_event;
			
 
				 }
			
 
				 
			
 
				-static int inotify_handle_event(struct fsnotify_group *group,
			
 
				-				struct fsnotify_mark *inode_mark,
			
 
				-				struct fsnotify_mark *vfsmount_mark,
			
 
				-				struct fsnotify_event *event)
			
 
				+int inotify_handle_event(struct fsnotify_group *group,
			
 
				+			 struct inode *inode,
			
 
				+			 struct fsnotify_mark *inode_mark,
			
 
				+			 struct fsnotify_mark *vfsmount_mark,
			
 
				+			 u32 mask, void *data, int data_type,
			
 
				+			 const unsigned char *file_name)
			
 
				 {
			
 
				 	struct inotify_inode_mark *i_mark;
			
 
				-	struct inode *to_tell;
			
 
				-	struct inotify_event_private_data *event_priv;
			
 
				-	struct fsnotify_event_private_data *fsn_event_priv;
			
 
				+	struct inotify_event_info *event;
			
 
				 	struct fsnotify_event *added_event;
			
 
				-	int wd, ret = 0;
			
 
				+	struct fsnotify_event *fsn_event;
			
 
				+	int ret = 0;
			
 
				+	int len = 0;
			
 
				+	int alloc_len = sizeof(struct inotify_event_info);
			
 
				 
			
 
				 	BUG_ON(vfsmount_mark);
			
 
				 
			
 
				-	pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
			
 
				-		 event, event->to_tell, event->mask);
			
 
				+	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
			
 
				+	    (data_type == FSNOTIFY_EVENT_PATH)) {
			
 
				+		struct path *path = data;
			
 
				 
			
 
				-	to_tell = event->to_tell;
			
 
				+		if (d_unlinked(path->dentry))
			
 
				+			return 0;
			
 
				+	}
			
 
				+	if (file_name) {
			
 
				+		len = strlen(file_name);
			
 
				+		alloc_len += len + 1;
			
 
				+	}
			
 
				+
			
 
				+	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
			
 
				+		 mask);
			
 
				 
			
 
				 	i_mark = container_of(inode_mark, struct inotify_inode_mark,
			
 
				 			      fsn_mark);
			
 
				-	wd = i_mark->wd;
			
 
				 
			
 
				-	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
			
 
				-	if (unlikely(!event_priv))
			
 
				+	event = kmalloc(alloc_len, GFP_KERNEL);
			
 
				+	if (unlikely(!event))
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
			
 
				-
			
 
				-	fsnotify_get_group(group);
			
 
				-	fsn_event_priv->group = group;
			
 
				-	event_priv->wd = wd;
			
 
				+	fsn_event = &event->fse;
			
 
				+	fsnotify_init_event(fsn_event, inode, mask);
			
 
				+	event->wd = i_mark->wd;
			
 
				+	event->name_len = len;
			
 
				+	if (len)
			
 
				+		strcpy(event->name, file_name);
			
 
				 
			
 
				-	added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
			
 
				+	added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
			
 
				 	if (added_event) {
			
 
				-		inotify_free_event_priv(fsn_event_priv);
			
 
				-		if (!IS_ERR(added_event))
			
 
				-			fsnotify_put_event(added_event);
			
 
				-		else
			
 
				+		/* Our event wasn't used in the end. Free it. */
			
 
				+		fsnotify_destroy_event(group, fsn_event);
			
 
				+		if (IS_ERR(added_event))
			
 
				 			ret = PTR_ERR(added_event);
			
 
				 	}
			
 
				 
			
@@ -142,22 +129,6 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
 
				 	inotify_ignored_and_remove_idr(fsn_mark, group);
			
 
				 }
			
 
				 
			
 
				-static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
			
 
				-				      struct fsnotify_mark *inode_mark,
			
 
				-				      struct fsnotify_mark *vfsmount_mark,
			
 
				-				      __u32 mask, void *data, int data_type)
			
 
				-{
			
 
				-	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
			
 
				-	    (data_type == FSNOTIFY_EVENT_PATH)) {
			
 
				-		struct path *path = data;
			
 
				-
			
 
				-		if (d_unlinked(path->dentry))
			
 
				-			return false;
			
 
				-	}
			
 
				-
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * This is NEVER supposed to be called.  Inotify marks should either have been
			
 
				  * removed from the idr when the watch was removed or in the
			
@@ -202,22 +173,14 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 
				 	free_uid(group->inotify_data.user);
			
 
				 }
			
 
				 
			
 
				-void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
			
 
				+static void inotify_free_event(struct fsnotify_event *fsn_event)
			
 
				 {
			
 
				-	struct inotify_event_private_data *event_priv;
			
 
				-
			
 
				-
			
 
				-	event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
			
 
				-				  fsnotify_event_priv_data);
			
 
				-
			
 
				-	fsnotify_put_group(fsn_event_priv->group);
			
 
				-	kmem_cache_free(event_priv_cachep, event_priv);
			
 
				+	kfree(INOTIFY_E(fsn_event));
			
 
				 }
			
 
				 
			
 
				 const struct fsnotify_ops inotify_fsnotify_ops = {
			
 
				 	.handle_event = inotify_handle_event,
			
 
				-	.should_send_event = inotify_should_send_event,
			
 
				 	.free_group_priv = inotify_free_group_priv,
			
 
				-	.free_event_priv = inotify_free_event_priv,
			
 
				+	.free_event = inotify_free_event,
			
 
				 	.freeing_mark = inotify_freeing_mark,
			
 
				 };
			
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly;
 
				 static int inotify_max_user_watches __read_mostly;
			
 
				 
			
 
				 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
			
 
				-struct kmem_cache *event_priv_cachep __read_mostly;
			
 
				 
			
 
				 #ifdef CONFIG_SYSCTL
			
 
				 
			
@@ -124,6 +123,16 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int round_event_name_len(struct fsnotify_event *fsn_event)
			
 
				+{
			
 
				+	struct inotify_event_info *event;
			
 
				+
			
 
				+	event = INOTIFY_E(fsn_event);
			
 
				+	if (!event->name_len)
			
 
				+		return 0;
			
 
				+	return roundup(event->name_len + 1, sizeof(struct inotify_event));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Get an inotify_kernel_event if one exists and is small
			
 
				  * enough to fit in "count". Return an error pointer if
			
@@ -144,9 +153,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
				 
			
 
				 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
			
 
				 
			
 
				-	if (event->name_len)
			
 
				-		event_size += roundup(event->name_len + 1, event_size);
			
 
				-
			
 
				+	event_size += round_event_name_len(event);
			
 
				 	if (event_size > count)
			
 
				 		return ERR_PTR(-EINVAL);
			
 
				 
			
@@ -164,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
				  * buffer we had in "get_one_event()" above.
			
 
				  */
			
 
				 static ssize_t copy_event_to_user(struct fsnotify_group *group,
			
 
				-				  struct fsnotify_event *event,
			
 
				+				  struct fsnotify_event *fsn_event,
			
 
				 				  char __user *buf)
			
 
				 {
			
 
				 	struct inotify_event inotify_event;
			
 
				-	struct fsnotify_event_private_data *fsn_priv;
			
 
				-	struct inotify_event_private_data *priv;
			
 
				+	struct inotify_event_info *event;
			
 
				 	size_t event_size = sizeof(struct inotify_event);
			
 
				-	size_t name_len = 0;
			
 
				-
			
 
				-	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
			
 
				+	size_t name_len;
			
 
				+	size_t pad_name_len;
			
 
				 
			
 
				-	/* we get the inotify watch descriptor from the event private data */
			
 
				-	spin_lock(&event->lock);
			
 
				-	fsn_priv = fsnotify_remove_priv_from_event(group, event);
			
 
				-	spin_unlock(&event->lock);
			
 
				-
			
 
				-	if (!fsn_priv)
			
 
				-		inotify_event.wd = -1;
			
 
				-	else {
			
 
				-		priv = container_of(fsn_priv, struct inotify_event_private_data,
			
 
				-				    fsnotify_event_priv_data);
			
 
				-		inotify_event.wd = priv->wd;
			
 
				-		inotify_free_event_priv(fsn_priv);
			
 
				-	}
			
 
				+	pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
			
 
				 
			
 
				+	event = INOTIFY_E(fsn_event);
			
 
				+	name_len = event->name_len;
			
 
				 	/*
			
 
				-	 * round up event->name_len so it is a multiple of event_size
			
 
				+	 * round up name length so it is a multiple of event_size
			
 
				 	 * plus an extra byte for the terminating '\0'.
			
 
				 	 */
			
 
				-	if (event->name_len)
			
 
				-		name_len = roundup(event->name_len + 1, event_size);
			
 
				-	inotify_event.len = name_len;
			
 
				-
			
 
				-	inotify_event.mask = inotify_mask_to_arg(event->mask);
			
 
				+	pad_name_len = round_event_name_len(fsn_event);
			
 
				+	inotify_event.len = pad_name_len;
			
 
				+	inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
			
 
				+	inotify_event.wd = event->wd;
			
 
				 	inotify_event.cookie = event->sync_cookie;
			
 
				 
			
 
				 	/* send the main event */
			
@@ -209,20 +203,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 
				 	/*
			
 
				 	 * fsnotify only stores the pathname, so here we have to send the pathname
			
 
				 	 * and then pad that pathname out to a multiple of sizeof(inotify_event)
			
 
				-	 * with zeros.  I get my zeros from the nul_inotify_event.
			
 
				+	 * with zeros.
			
 
				 	 */
			
 
				-	if (name_len) {
			
 
				-		unsigned int len_to_zero = name_len - event->name_len;
			
 
				+	if (pad_name_len) {
			
 
				 		/* copy the path name */
			
 
				-		if (copy_to_user(buf, event->file_name, event->name_len))
			
 
				+		if (copy_to_user(buf, event->name, name_len))
			
 
				 			return -EFAULT;
			
 
				-		buf += event->name_len;
			
 
				+		buf += name_len;
			
 
				 
			
 
				 		/* fill userspace with 0's */
			
 
				-		if (clear_user(buf, len_to_zero))
			
 
				+		if (clear_user(buf, pad_name_len - name_len))
			
 
				 			return -EFAULT;
			
 
				-		buf += len_to_zero;
			
 
				-		event_size += name_len;
			
 
				+		event_size += pad_name_len;
			
 
				 	}
			
 
				 
			
 
				 	return event_size;
			
@@ -254,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 
				 			if (IS_ERR(kevent))
			
 
				 				break;
			
 
				 			ret = copy_event_to_user(group, kevent, buf);
			
 
				-			fsnotify_put_event(kevent);
			
 
				+			fsnotify_destroy_event(group, kevent);
			
 
				 			if (ret < 0)
			
 
				 				break;
			
 
				 			buf += ret;
			
@@ -297,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 
				 			  unsigned long arg)
			
 
				 {
			
 
				 	struct fsnotify_group *group;
			
 
				-	struct fsnotify_event_holder *holder;
			
 
				-	struct fsnotify_event *event;
			
 
				+	struct fsnotify_event *fsn_event;
			
 
				 	void __user *p;
			
 
				 	int ret = -ENOTTY;
			
 
				 	size_t send_len = 0;
			
@@ -311,12 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 
				 	switch (cmd) {
			
 
				 	case FIONREAD:
			
 
				 		mutex_lock(&group->notification_mutex);
			
 
				-		list_for_each_entry(holder, &group->notification_list, event_list) {
			
 
				-			event = holder->event;
			
 
				+		list_for_each_entry(fsn_event, &group->notification_list,
			
 
				+				    list) {
			
 
				 			send_len += sizeof(struct inotify_event);
			
 
				-			if (event->name_len)
			
 
				-				send_len += roundup(event->name_len + 1,
			
 
				-						sizeof(struct inotify_event));
			
 
				+			send_len += round_event_name_len(fsn_event);
			
 
				 		}
			
 
				 		mutex_unlock(&group->notification_mutex);
			
 
				 		ret = put_user(send_len, (int __user *) p);
			
@@ -503,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 
				 				    struct fsnotify_group *group)
			
 
				 {
			
 
				 	struct inotify_inode_mark *i_mark;
			
 
				-	struct fsnotify_event *ignored_event, *notify_event;
			
 
				-	struct inotify_event_private_data *event_priv;
			
 
				-	struct fsnotify_event_private_data *fsn_event_priv;
			
 
				-	int ret;
			
 
				-
			
 
				-	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
			
 
				-
			
 
				-	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
			
 
				-					      FSNOTIFY_EVENT_NONE, NULL, 0,
			
 
				-					      GFP_NOFS);
			
 
				-	if (!ignored_event)
			
 
				-		goto skip_send_ignore;
			
 
				-
			
 
				-	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
			
 
				-	if (unlikely(!event_priv))
			
 
				-		goto skip_send_ignore;
			
 
				-
			
 
				-	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
			
 
				-
			
 
				-	fsnotify_get_group(group);
			
 
				-	fsn_event_priv->group = group;
			
 
				-	event_priv->wd = i_mark->wd;
			
 
				-
			
 
				-	notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
			
 
				-	if (notify_event) {
			
 
				-		if (IS_ERR(notify_event))
			
 
				-			ret = PTR_ERR(notify_event);
			
 
				-		else
			
 
				-			fsnotify_put_event(notify_event);
			
 
				-		inotify_free_event_priv(fsn_event_priv);
			
 
				-	}
			
 
				 
			
 
				-skip_send_ignore:
			
 
				-	/* matches the reference taken when the event was created */
			
 
				-	if (ignored_event)
			
 
				-		fsnotify_put_event(ignored_event);
			
 
				+	/* Queue ignore event for the watch */
			
 
				+	inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
			
 
				+			     NULL, FSNOTIFY_EVENT_NONE, NULL);
			
 
				 
			
 
				+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
			
 
				 	/* remove this mark from the idr */
			
 
				 	inotify_remove_from_idr(group, i_mark);
			
 
				 
			
@@ -836,7 +794,6 @@ static int __init inotify_user_setup(void)
 
				 	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
			
 
				 
			
 
				 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
			
 
				-	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
			
 
				 
			
 
				 	inotify_max_queued_events = 16384;
			
 
				 	inotify_max_user_instances = 128;
			
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -48,15 +48,6 @@
 
				 #include <linux/fsnotify_backend.h>
			
 
				 #include "fsnotify.h"
			
 
				 
			
 
				-static struct kmem_cache *fsnotify_event_cachep;
			
 
				-static struct kmem_cache *fsnotify_event_holder_cachep;
			
 
				-/*
			
 
				- * This is a magic event we send when the q is too full.  Since it doesn't
			
 
				- * hold real event information we just keep one system wide and use it any time
			
 
				- * it is needed.  It's refcnt is set 1 at kernel init time and will never
			
 
				- * get set to 0 so it will never get 'freed'
			
 
				- */
			
 
				-static struct fsnotify_event *q_overflow_event;
			
 
				 static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
			
 
				 
			
 
				 /**
			
@@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
 
				 	return list_empty(&group->notification_list) ? true : false;
			
 
				 }
			
 
				 
			
 
				-void fsnotify_get_event(struct fsnotify_event *event)
			
 
				+void fsnotify_destroy_event(struct fsnotify_group *group,
			
 
				+			    struct fsnotify_event *event)
			
 
				 {
			
 
				-	atomic_inc(&event->refcnt);
			
 
				-}
			
 
				-
			
 
				-void fsnotify_put_event(struct fsnotify_event *event)
			
 
				-{
			
 
				-	if (!event)
			
 
				+	/* Overflow events are per-group and we don't want to free them */
			
 
				+	if (!event || event->mask == FS_Q_OVERFLOW)
			
 
				 		return;
			
 
				 
			
 
				-	if (atomic_dec_and_test(&event->refcnt)) {
			
 
				-		pr_debug("%s: event=%p\n", __func__, event);
			
 
				-
			
 
				-		if (event->data_type == FSNOTIFY_EVENT_PATH)
			
 
				-			path_put(&event->path);
			
 
				-
			
 
				-		BUG_ON(!list_empty(&event->private_data_list));
			
 
				-
			
 
				-		kfree(event->file_name);
			
 
				-		put_pid(event->tgid);
			
 
				-		kmem_cache_free(fsnotify_event_cachep, event);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
			
 
				-{
			
 
				-	return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
			
 
				-}
			
 
				-
			
 
				-void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
			
 
				-{
			
 
				-	if (holder)
			
 
				-		kmem_cache_free(fsnotify_event_holder_cachep, holder);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Find the private data that the group previously attached to this event when
			
 
				- * the group added the event to the notification queue (fsnotify_add_notify_event)
			
 
				- */
			
 
				-struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
			
 
				-{
			
 
				-	struct fsnotify_event_private_data *lpriv;
			
 
				-	struct fsnotify_event_private_data *priv = NULL;
			
 
				-
			
 
				-	assert_spin_locked(&event->lock);
			
 
				-
			
 
				-	list_for_each_entry(lpriv, &event->private_data_list, event_list) {
			
 
				-		if (lpriv->group == group) {
			
 
				-			priv = lpriv;
			
 
				-			list_del(&priv->event_list);
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	return priv;
			
 
				+	group->ops->free_event(event);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
 
				  * event off the queue to deal with.  If the event is successfully added to the
			
 
				  * group's notification queue, a reference is taken on event.
			
 
				  */
			
 
				-struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
			
 
				-						 struct fsnotify_event_private_data *priv,
			
 
				+struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
			
 
				+						 struct fsnotify_event *event,
			
 
				 						 struct fsnotify_event *(*merge)(struct list_head *,
			
 
				 										 struct fsnotify_event *))
			
 
				 {
			
 
				 	struct fsnotify_event *return_event = NULL;
			
 
				-	struct fsnotify_event_holder *holder = NULL;
			
 
				 	struct list_head *list = &group->notification_list;
			
 
				 
			
 
				-	pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
			
 
				-
			
 
				-	/*
			
 
				-	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
			
 
				-	 * Check if we expect to be able to use that holder.  If not alloc a new
			
 
				-	 * holder.
			
 
				-	 * For the overflow event it's possible that something will use the in
			
 
				-	 * event holder before we get the lock so we may need to jump back and
			
 
				-	 * alloc a new holder, this can't happen for most events...
			
 
				-	 */
			
 
				-	if (!list_empty(&event->holder.event_list)) {
			
 
				-alloc_holder:
			
 
				-		holder = fsnotify_alloc_event_holder();
			
 
				-		if (!holder)
			
 
				-			return ERR_PTR(-ENOMEM);
			
 
				-	}
			
 
				+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
			
 
				 
			
 
				 	mutex_lock(&group->notification_mutex);
			
 
				 
			
 
				 	if (group->q_len >= group->max_events) {
			
 
				-		event = q_overflow_event;
			
 
				-
			
 
				-		/*
			
 
				-		 * we need to return the overflow event
			
 
				-		 * which means we need a ref
			
 
				-		 */
			
 
				-		fsnotify_get_event(event);
			
 
				+		/* Queue overflow event only if it isn't already queued */
			
 
				+		if (list_empty(&group->overflow_event.list))
			
 
				+			event = &group->overflow_event;
			
 
				 		return_event = event;
			
 
				-
			
 
				-		/* sorry, no private data on the overflow event */
			
 
				-		priv = NULL;
			
 
				 	}
			
 
				 
			
 
				 	if (!list_empty(list) && merge) {
			
 
				-		struct fsnotify_event *tmp;
			
 
				-
			
 
				-		tmp = merge(list, event);
			
 
				-		if (tmp) {
			
 
				-			mutex_unlock(&group->notification_mutex);
			
 
				-
			
 
				-			if (return_event)
			
 
				-				fsnotify_put_event(return_event);
			
 
				-			if (holder != &event->holder)
			
 
				-				fsnotify_destroy_event_holder(holder);
			
 
				-			return tmp;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	spin_lock(&event->lock);
			
 
				-
			
 
				-	if (list_empty(&event->holder.event_list)) {
			
 
				-		if (unlikely(holder))
			
 
				-			fsnotify_destroy_event_holder(holder);
			
 
				-		holder = &event->holder;
			
 
				-	} else if (unlikely(!holder)) {
			
 
				-		/* between the time we checked above and got the lock the in
			
 
				-		 * event holder was used, go back and get a new one */
			
 
				-		spin_unlock(&event->lock);
			
 
				-		mutex_unlock(&group->notification_mutex);
			
 
				-
			
 
				+		return_event = merge(list, event);
			
 
				 		if (return_event) {
			
 
				-			fsnotify_put_event(return_event);
			
 
				-			return_event = NULL;
			
 
				+			mutex_unlock(&group->notification_mutex);
			
 
				+			return return_event;
			
 
				 		}
			
 
				-
			
 
				-		goto alloc_holder;
			
 
				 	}
			
 
				 
			
 
				 	group->q_len++;
			
 
				-	holder->event = event;
			
 
				-
			
 
				-	fsnotify_get_event(event);
			
 
				-	list_add_tail(&holder->event_list, list);
			
 
				-	if (priv)
			
 
				-		list_add_tail(&priv->event_list, &event->private_data_list);
			
 
				-	spin_unlock(&event->lock);
			
 
				+	list_add_tail(&event->list, list);
			
 
				 	mutex_unlock(&group->notification_mutex);
			
 
				 
			
 
				 	wake_up(&group->notification_waitq);
			
@@ -230,32 +119,20 @@ alloc_holder:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Remove and return the first event from the notification list.  There is a
			
 
				- * reference held on this event since it was on the list.  It is the responsibility
			
 
				- * of the caller to drop this reference.
			
 
				+ * Remove and return the first event from the notification list.  It is the
			
 
				+ * responsibility of the caller to destroy the obtained event
			
 
				  */
			
 
				 struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
			
 
				 {
			
 
				 	struct fsnotify_event *event;
			
 
				-	struct fsnotify_event_holder *holder;
			
 
				 
			
 
				 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
			
 
				 
			
 
				 	pr_debug("%s: group=%p\n", __func__, group);
			
 
				 
			
 
				-	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
			
 
				-
			
 
				-	event = holder->event;
			
 
				-
			
 
				-	spin_lock(&event->lock);
			
 
				-	holder->event = NULL;
			
 
				-	list_del_init(&holder->event_list);
			
 
				-	spin_unlock(&event->lock);
			
 
				-
			
 
				-	/* event == holder means we are referenced through the in event holder */
			
 
				-	if (holder != &event->holder)
			
 
				-		fsnotify_destroy_event_holder(holder);
			
 
				-
			
 
				+	event = list_first_entry(&group->notification_list,
			
 
				+				 struct fsnotify_event, list);
			
 
				+	list_del(&event->list);
			
 
				 	group->q_len--;
			
 
				 
			
 
				 	return event;
			
@@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 
				  */
			
 
				 struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
			
 
				 {
			
 
				-	struct fsnotify_event *event;
			
 
				-	struct fsnotify_event_holder *holder;
			
 
				-
			
 
				 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
			
 
				 
			
 
				-	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
			
 
				-	event = holder->event;
			
 
				-
			
 
				-	return event;
			
 
				+	return list_first_entry(&group->notification_list,
			
 
				+				struct fsnotify_event, list);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
 
				 void fsnotify_flush_notify(struct fsnotify_group *group)
			
 
				 {
			
 
				 	struct fsnotify_event *event;
			
 
				-	struct fsnotify_event_private_data *priv;
			
 
				 
			
 
				 	mutex_lock(&group->notification_mutex);
			
 
				 	while (!fsnotify_notify_queue_is_empty(group)) {
			
 
				 		event = fsnotify_remove_notify_event(group);
			
 
				-		/* if they don't implement free_event_priv they better not have attached any */
			
 
				-		if (group->ops->free_event_priv) {
			
 
				-			spin_lock(&event->lock);
			
 
				-			priv = fsnotify_remove_priv_from_event(group, event);
			
 
				-			spin_unlock(&event->lock);
			
 
				-			if (priv)
			
 
				-				group->ops->free_event_priv(priv);
			
 
				-		}
			
 
				-		fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
			
 
				+		fsnotify_destroy_event(group, event);
			
 
				 	}
			
 
				 	mutex_unlock(&group->notification_mutex);
			
 
				 }
			
 
				 
			
 
				-static void initialize_event(struct fsnotify_event *event)
			
 
				-{
			
 
				-	INIT_LIST_HEAD(&event->holder.event_list);
			
 
				-	atomic_set(&event->refcnt, 1);
			
 
				-
			
 
				-	spin_lock_init(&event->lock);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&event->private_data_list);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Caller damn well better be holding whatever mutex is protecting the
			
 
				- * old_holder->event_list and the new_event must be a clean event which
			
 
				- * cannot be found anywhere else in the kernel.
			
 
				- */
			
 
				-int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
			
 
				-			   struct fsnotify_event *new_event)
			
 
				-{
			
 
				-	struct fsnotify_event *old_event = old_holder->event;
			
 
				-	struct fsnotify_event_holder *new_holder = &new_event->holder;
			
 
				-
			
 
				-	enum event_spinlock_class {
			
 
				-		SPINLOCK_OLD,
			
 
				-		SPINLOCK_NEW,
			
 
				-	};
			
 
				-
			
 
				-	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
			
 
				-
			
 
				-	/*
			
 
				-	 * if the new_event's embedded holder is in use someone
			
 
				-	 * screwed up and didn't give us a clean new event.
			
 
				-	 */
			
 
				-	BUG_ON(!list_empty(&new_holder->event_list));
			
 
				-
			
 
				-	spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
			
 
				-	spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
			
 
				-
			
 
				-	new_holder->event = new_event;
			
 
				-	list_replace_init(&old_holder->event_list, &new_holder->event_list);
			
 
				-
			
 
				-	spin_unlock(&new_event->lock);
			
 
				-	spin_unlock(&old_event->lock);
			
 
				-
			
 
				-	/* event == holder means we are referenced through the in event holder */
			
 
				-	if (old_holder != &old_event->holder)
			
 
				-		fsnotify_destroy_event_holder(old_holder);
			
 
				-
			
 
				-	fsnotify_get_event(new_event); /* on the list take reference */
			
 
				-	fsnotify_put_event(old_event); /* off the list, drop reference */
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
			
 
				-{
			
 
				-	struct fsnotify_event *event;
			
 
				-
			
 
				-	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
			
 
				-	if (!event)
			
 
				-		return NULL;
			
 
				-
			
 
				-	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
			
 
				-
			
 
				-	memcpy(event, old_event, sizeof(*event));
			
 
				-	initialize_event(event);
			
 
				-
			
 
				-	if (event->name_len) {
			
 
				-		event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
			
 
				-		if (!event->file_name) {
			
 
				-			kmem_cache_free(fsnotify_event_cachep, event);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-	}
			
 
				-	event->tgid = get_pid(old_event->tgid);
			
 
				-	if (event->data_type == FSNOTIFY_EVENT_PATH)
			
 
				-		path_get(&event->path);
			
 
				-
			
 
				-	return event;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * fsnotify_create_event - Allocate a new event which will be sent to each
			
 
				  * group's handle_event function if the group was interested in this
			
 
				  * particular event.
			
 
				  *
			
 
				- * @to_tell the inode which is supposed to receive the event (sometimes a
			
 
				+ * @inode the inode which is supposed to receive the event (sometimes a
			
 
				  *	parent of the inode to which the event happened.
			
 
				  * @mask what actually happened.
			
 
				  * @data pointer to the object which was actually affected
			
 
				  * @data_type flag indication if the data is a file, path, inode, nothing...
			
 
				  * @name the filename, if available
			
 
				  */
			
 
				-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
			
 
				-					     int data_type, const unsigned char *name,
			
 
				-					     u32 cookie, gfp_t gfp)
			
 
				+void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
			
 
				+			 u32 mask)
			
 
				 {
			
 
				-	struct fsnotify_event *event;
			
 
				-
			
 
				-	event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
			
 
				-	if (!event)
			
 
				-		return NULL;
			
 
				-
			
 
				-	pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
			
 
				-		 __func__, event, to_tell, mask, data, data_type);
			
 
				-
			
 
				-	initialize_event(event);
			
 
				-
			
 
				-	if (name) {
			
 
				-		event->file_name = kstrdup(name, gfp);
			
 
				-		if (!event->file_name) {
			
 
				-			kmem_cache_free(fsnotify_event_cachep, event);
			
 
				-			return NULL;
			
 
				-		}
			
 
				-		event->name_len = strlen(event->file_name);
			
 
				-	}
			
 
				-
			
 
				-	event->tgid = get_pid(task_tgid(current));
			
 
				-	event->sync_cookie = cookie;
			
 
				-	event->to_tell = to_tell;
			
 
				-	event->data_type = data_type;
			
 
				-
			
 
				-	switch (data_type) {
			
 
				-	case FSNOTIFY_EVENT_PATH: {
			
 
				-		struct path *path = data;
			
 
				-		event->path.dentry = path->dentry;
			
 
				-		event->path.mnt = path->mnt;
			
 
				-		path_get(&event->path);
			
 
				-		break;
			
 
				-	}
			
 
				-	case FSNOTIFY_EVENT_INODE:
			
 
				-		event->inode = data;
			
 
				-		break;
			
 
				-	case FSNOTIFY_EVENT_NONE:
			
 
				-		event->inode = NULL;
			
 
				-		event->path.dentry = NULL;
			
 
				-		event->path.mnt = NULL;
			
 
				-		break;
			
 
				-	default:
			
 
				-		BUG();
			
 
				-	}
			
 
				-
			
 
				+	INIT_LIST_HEAD(&event->list);
			
 
				+	event->inode = inode;
			
 
				 	event->mask = mask;
			
 
				-
			
 
				-	return event;
			
 
				-}
			
 
				-
			
 
				-static __init int fsnotify_notification_init(void)
			
 
				-{
			
 
				-	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
			
 
				-	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
			
 
				-
			
 
				-	q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
			
 
				-						 FSNOTIFY_EVENT_NONE, NULL, 0,
			
 
				-						 GFP_KERNEL);
			
 
				-	if (!q_overflow_event)
			
 
				-		panic("unable to allocate fsnotify q_overflow_event\n");
			
 
				-
			
 
				-	return 0;
			
 
				 }
			
 
				-subsys_initcall(fsnotify_notification_init);
			
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -38,7 +38,6 @@ ocfs2-objs := \
 
				 	symlink.o 		\
			
 
				 	sysfile.o 		\
			
 
				 	uptodate.o		\
			
 
				-	ver.o			\
			
 
				 	quota_local.o		\
			
 
				 	quota_global.o		\
			
 
				 	xattr.o			\
			
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7260,14 +7260,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 	start = range->start >> osb->s_clustersize_bits;
			
 
				 	len = range->len >> osb->s_clustersize_bits;
			
 
				 	minlen = range->minlen >> osb->s_clustersize_bits;
			
 
				-	trimmed = 0;
			
 
				-
			
 
				-	if (!len) {
			
 
				-		range->len = 0;
			
 
				-		return 0;
			
 
				-	}
			
 
				 
			
 
				-	if (minlen >= osb->bitmap_cpg)
			
 
				+	if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	main_bm_inode = ocfs2_get_system_file_inode(osb,
			
@@ -7293,6 +7287,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				+	len = range->len >> osb->s_clustersize_bits;
			
 
				 	if (start + len > le32_to_cpu(main_bm->i_clusters))
			
 
				 		len = le32_to_cpu(main_bm->i_clusters) - start;
			
 
				 
			
@@ -7307,6 +7302,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 	last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
			
 
				 	last_bit = osb->bitmap_cpg;
			
 
				 
			
 
				+	trimmed = 0;
			
 
				 	for (group = first_group; group <= last_group;) {
			
 
				 		if (first_bit + len >= osb->bitmap_cpg)
			
 
				 			last_bit = osb->bitmap_cpg;
			
--- a/fs/ocfs2/cluster/Makefile
+++ b/fs/ocfs2/cluster/Makefile
@@ -1,4 +1,4 @@
 
				 obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
			
 
				 
			
 
				 ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
			
 
				-	quorum.o tcp.o netdebug.o ver.o
			
 
				+	quorum.o tcp.o netdebug.o
			
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -29,7 +29,6 @@
 
				 #include "heartbeat.h"
			
 
				 #include "masklog.h"
			
 
				 #include "sys.h"
			
 
				-#include "ver.h"
			
 
				 
			
 
				 /* for now we operate under the assertion that there can be only one
			
 
				  * cluster active at a time.  Changing this will require trickling
			
@@ -945,8 +944,6 @@ static int __init init_o2nm(void)
 
				 {
			
 
				 	int ret = -1;
			
 
				 
			
 
				-	cluster_print_version();
			
 
				-
			
 
				 	ret = o2hb_init();
			
 
				 	if (ret)
			
 
				 		goto out;
			
@@ -984,6 +981,7 @@ out:
 
				 
			
 
				 MODULE_AUTHOR("Oracle");
			
 
				 MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("OCFS2 cluster management");
			
 
				 
			
 
				 module_init(init_o2nm)
			
 
				 module_exit(exit_o2nm)
			
--- a/fs/ocfs2/cluster/ver.c
+++ b/fs/ocfs2/cluster/ver.c
@@ -1,42 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * ver.c
			
 
				- *
			
 
				- * version string
			
 
				- *
			
 
				- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kernel.h>
			
 
				-
			
 
				-#include "ver.h"
			
 
				-
			
 
				-#define CLUSTER_BUILD_VERSION "1.5.0"
			
 
				-
			
 
				-#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
			
 
				-
			
 
				-void cluster_print_version(void)
			
 
				-{
			
 
				-	printk(KERN_INFO "%s\n", VERSION_STR);
			
 
				-}
			
 
				-
			
 
				-MODULE_DESCRIPTION(VERSION_STR);
			
 
				-
			
 
				-MODULE_VERSION(CLUSTER_BUILD_VERSION);
			
--- a/fs/ocfs2/cluster/ver.h
+++ b/fs/ocfs2/cluster/ver.h
@@ -1,31 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * ver.h
			
 
				- *
			
 
				- * Function prototypes
			
 
				- *
			
 
				- * Copyright (C) 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#ifndef O2CLUSTER_VER_H
			
 
				-#define O2CLUSTER_VER_H
			
 
				-
			
 
				-void cluster_print_version(void);
			
 
				-
			
 
				-#endif /* O2CLUSTER_VER_H */
			
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2
 
				 obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
			
 
				 
			
 
				 ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
			
 
				-	dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
			
 
				+	dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o
			
 
				 
			
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -43,8 +43,6 @@
 
				 #include "dlmdomain.h"
			
 
				 #include "dlmdebug.h"
			
 
				 
			
 
				-#include "dlmver.h"
			
 
				-
			
 
				 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
			
 
				 #include "cluster/masklog.h"
			
 
				 
			
@@ -2328,8 +2326,6 @@ static int __init dlm_init(void)
 
				 {
			
 
				 	int status;
			
 
				 
			
 
				-	dlm_print_version();
			
 
				-
			
 
				 	status = dlm_init_mle_cache();
			
 
				 	if (status) {
			
 
				 		mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
			
@@ -2379,6 +2375,7 @@ static void __exit dlm_exit (void)
 
				 
			
 
				 MODULE_AUTHOR("Oracle");
			
 
				 MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("OCFS2 Distributed Lock Management");
			
 
				 
			
 
				 module_init(dlm_init);
			
 
				 module_exit(dlm_exit);
			
--- a/fs/ocfs2/dlm/dlmver.c
+++ b/fs/ocfs2/dlm/dlmver.c
@@ -1,42 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * dlmver.c
			
 
				- *
			
 
				- * version string
			
 
				- *
			
 
				- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kernel.h>
			
 
				-
			
 
				-#include "dlmver.h"
			
 
				-
			
 
				-#define DLM_BUILD_VERSION "1.5.0"
			
 
				-
			
 
				-#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
			
 
				-
			
 
				-void dlm_print_version(void)
			
 
				-{
			
 
				-	printk(KERN_INFO "%s\n", VERSION_STR);
			
 
				-}
			
 
				-
			
 
				-MODULE_DESCRIPTION(VERSION_STR);
			
 
				-
			
 
				-MODULE_VERSION(DLM_BUILD_VERSION);
			
--- a/fs/ocfs2/dlm/dlmver.h
+++ b/fs/ocfs2/dlm/dlmver.h
@@ -1,31 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * dlmfsver.h
			
 
				- *
			
 
				- * Function prototypes
			
 
				- *
			
 
				- * Copyright (C) 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#ifndef DLM_VER_H
			
 
				-#define DLM_VER_H
			
 
				-
			
 
				-void dlm_print_version(void);
			
 
				-
			
 
				-#endif /* DLM_VER_H */
			
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2
 
				 
			
 
				 obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
			
 
				 
			
 
				-ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
			
 
				+ocfs2_dlmfs-objs := userdlm.o dlmfs.o
			
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -49,7 +49,6 @@
 
				 
			
 
				 #include "stackglue.h"
			
 
				 #include "userdlm.h"
			
 
				-#include "dlmfsver.h"
			
 
				 
			
 
				 #define MLOG_MASK_PREFIX ML_DLMFS
			
 
				 #include "cluster/masklog.h"
			
@@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void)
 
				 	int status;
			
 
				 	int cleanup_inode = 0, cleanup_worker = 0;
			
 
				 
			
 
				-	dlmfs_print_version();
			
 
				-
			
 
				 	status = bdi_init(&dlmfs_backing_dev_info);
			
 
				 	if (status)
			
 
				 		return status;
			
@@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void)
 
				 
			
 
				 MODULE_AUTHOR("Oracle");
			
 
				 MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
			
 
				 
			
 
				 module_init(init_dlmfs_fs)
			
 
				 module_exit(exit_dlmfs_fs)
			
--- a/fs/ocfs2/dlmfs/dlmfsver.c
+++ b/fs/ocfs2/dlmfs/dlmfsver.c
@@ -1,42 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * dlmfsver.c
			
 
				- *
			
 
				- * version string
			
 
				- *
			
 
				- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/kernel.h>
			
 
				-
			
 
				-#include "dlmfsver.h"
			
 
				-
			
 
				-#define DLM_BUILD_VERSION "1.5.0"
			
 
				-
			
 
				-#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
			
 
				-
			
 
				-void dlmfs_print_version(void)
			
 
				-{
			
 
				-	printk(KERN_INFO "%s\n", VERSION_STR);
			
 
				-}
			
 
				-
			
 
				-MODULE_DESCRIPTION(VERSION_STR);
			
 
				-
			
 
				-MODULE_VERSION(DLM_BUILD_VERSION);
			
--- a/fs/ocfs2/dlmfs/dlmfsver.h
+++ b/fs/ocfs2/dlmfs/dlmfsver.h
@@ -1,31 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * dlmver.h
			
 
				- *
			
 
				- * Function prototypes
			
 
				- *
			
 
				- * Copyright (C) 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#ifndef DLMFS_VER_H
			
 
				-#define DLMFS_VER_H
			
 
				-
			
 
				-void dlmfs_print_version(void);
			
 
				-
			
 
				-#endif /* DLMFS_VER_H */
			
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2996,6 +2996,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 
				 
			
 
				 	/* for now, uuid == domain */
			
 
				 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
			
 
				+				       osb->osb_cluster_name,
			
 
				+				       strlen(osb->osb_cluster_name),
			
 
				 				       osb->uuid_str,
			
 
				 				       strlen(osb->uuid_str),
			
 
				 				       &lproto, ocfs2_do_node_down, osb,
			
@@ -3005,7 +3007,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_cluster_this_node(&osb->node_num);
			
 
				+	status = ocfs2_cluster_this_node(conn, &osb->node_num);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		mlog(ML_ERROR,
			
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1869,7 +1869,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 
				 	}
			
 
				 	size = sr->l_start + sr->l_len;
			
 
				 
			
 
				-	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
			
 
				+	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
			
 
				+	    cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
			
 
				 		if (sr->l_len <= 0) {
			
 
				 			ret = -EINVAL;
			
 
				 			goto out_inode_unlock;
			
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
 
				 
			
 
				 #include <linux/fs.h>
			
 
				 #include <linux/mount.h>
			
 
				+#include <linux/blkdev.h>
			
 
				 #include <linux/compat.h>
			
 
				 
			
 
				 #include <cluster/masklog.h>
			
@@ -966,15 +967,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
				 	case FITRIM:
			
 
				 	{
			
 
				 		struct super_block *sb = inode->i_sb;
			
 
				+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
			
 
				 		struct fstrim_range range;
			
 
				 		int ret = 0;
			
 
				 
			
 
				 		if (!capable(CAP_SYS_ADMIN))
			
 
				 			return -EPERM;
			
 
				 
			
 
				+		if (!blk_queue_discard(q))
			
 
				+			return -EOPNOTSUPP;
			
 
				+
			
 
				 		if (copy_from_user(&range, argp, sizeof(range)))
			
 
				 			return -EFAULT;
			
 
				 
			
 
				+		range.minlen = max_t(u64, q->limits.discard_granularity,
			
 
				+				     range.minlen);
			
 
				 		ret = ocfs2_trim_fs(sb, &range);
			
 
				 		if (ret < 0)
			
 
				 			return ret;
			
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -561,83 +561,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
 
				 	mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
			
 
				 }
			
 
				 
			
 
				-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
			
 
				-				       handle_t *handle,
			
 
				-				       struct buffer_head *di_bh,
			
 
				-				       u32 num_bits,
			
 
				-				       u16 chain)
			
 
				-{
			
 
				-	int ret;
			
 
				-	u32 tmp_used;
			
 
				-	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
			
 
				-	struct ocfs2_chain_list *cl =
			
 
				-				(struct ocfs2_chain_list *) &di->id2.i_chain;
			
 
				-
			
 
				-	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
			
 
				-				      OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
			
 
				-	di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
			
 
				-	le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
			
 
				-	ocfs2_journal_dirty(handle, di_bh);
			
 
				-
			
 
				-out:
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-static inline int ocfs2_block_group_set_bits(handle_t *handle,
			
 
				-					     struct inode *alloc_inode,
			
 
				-					     struct ocfs2_group_desc *bg,
			
 
				-					     struct buffer_head *group_bh,
			
 
				-					     unsigned int bit_off,
			
 
				-					     unsigned int num_bits)
			
 
				-{
			
 
				-	int status;
			
 
				-	void *bitmap = bg->bg_bitmap;
			
 
				-	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
			
 
				-
			
 
				-	/* All callers get the descriptor via
			
 
				-	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
			
 
				-	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
			
 
				-	BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
			
 
				-
			
 
				-	mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
			
 
				-	     num_bits);
			
 
				-
			
 
				-	if (ocfs2_is_cluster_bitmap(alloc_inode))
			
 
				-		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
			
 
				-
			
 
				-	status = ocfs2_journal_access_gd(handle,
			
 
				-					 INODE_CACHE(alloc_inode),
			
 
				-					 group_bh,
			
 
				-					 journal_type);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
			
 
				-	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
			
 
				-		ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
			
 
				-			    " count %u but claims %u are freed. num_bits %d",
			
 
				-			    (unsigned long long)le64_to_cpu(bg->bg_blkno),
			
 
				-			    le16_to_cpu(bg->bg_bits),
			
 
				-			    le16_to_cpu(bg->bg_free_bits_count), num_bits);
			
 
				-		return -EROFS;
			
 
				-	}
			
 
				-	while (num_bits--)
			
 
				-		ocfs2_set_bit(bit_off++, bitmap);
			
 
				-
			
 
				-	ocfs2_journal_dirty(handle, group_bh);
			
 
				-
			
 
				-bail:
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				 static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
			
 
				 			     u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
			
 
				 			     u32 len, int ext_flags)
			
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -387,6 +387,7 @@ struct ocfs2_super
 
				 	u8 osb_stackflags;
			
 
				 
			
 
				 	char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
			
 
				+	char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1];
			
 
				 	struct ocfs2_cluster_connection *cconn;
			
 
				 	struct ocfs2_lock_res osb_super_lockres;
			
 
				 	struct ocfs2_lock_res osb_rename_lockres;
			
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int o2cb_cluster_this_node(unsigned int *node)
			
 
				+static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
			
 
				+				  unsigned int *node)
			
 
				 {
			
 
				 	int node_num;
			
 
				 
			
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -23,6 +23,7 @@
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/slab.h>
			
 
				 #include <linux/reboot.h>
			
 
				+#include <linux/sched.h>
			
 
				 #include <asm/uaccess.h>
			
 
				 
			
 
				 #include "stackglue.h"
			
@@ -102,6 +103,12 @@
 
				 #define OCFS2_TEXT_UUID_LEN			32
			
 
				 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN	2
			
 
				 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN	8
			
 
				+#define VERSION_LOCK				"version_lock"
			
 
				+
			
 
				+enum ocfs2_connection_type {
			
 
				+	WITH_CONTROLD,
			
 
				+	NO_CONTROLD
			
 
				+};
			
 
				 
			
 
				 /*
			
 
				  * ocfs2_live_connection is refcounted because the filesystem and
			
@@ -110,6 +117,13 @@
 
				 struct ocfs2_live_connection {
			
 
				 	struct list_head		oc_list;
			
 
				 	struct ocfs2_cluster_connection	*oc_conn;
			
 
				+	enum ocfs2_connection_type	oc_type;
			
 
				+	atomic_t                        oc_this_node;
			
 
				+	int                             oc_our_slot;
			
 
				+	struct dlm_lksb                 oc_version_lksb;
			
 
				+	char                            oc_lvb[DLM_LVB_LEN];
			
 
				+	struct completion               oc_sync_wait;
			
 
				+	wait_queue_head_t		oc_wait;
			
 
				 };
			
 
				 
			
 
				 struct ocfs2_control_private {
			
@@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
 
				  * mount path.  Since the VFS prevents multiple calls to
			
 
				  * fill_super(), we can't get dupes here.
			
 
				  */
			
 
				-static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
			
 
				-				     struct ocfs2_live_connection **c_ret)
			
 
				+static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
			
 
				+				     struct ocfs2_live_connection *c)
			
 
				 {
			
 
				 	int rc = 0;
			
 
				-	struct ocfs2_live_connection *c;
			
 
				-
			
 
				-	c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
			
 
				-	if (!c)
			
 
				-		return -ENOMEM;
			
 
				 
			
 
				 	mutex_lock(&ocfs2_control_lock);
			
 
				 	c->oc_conn = conn;
			
 
				 
			
 
				-	if (atomic_read(&ocfs2_control_opened))
			
 
				+	if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened))
			
 
				 		list_add(&c->oc_list, &ocfs2_live_connection_list);
			
 
				 	else {
			
 
				 		printk(KERN_ERR
			
@@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
 
				 	}
			
 
				 
			
 
				 	mutex_unlock(&ocfs2_control_lock);
			
 
				-
			
 
				-	if (!rc)
			
 
				-		*c_ret = c;
			
 
				-	else
			
 
				-		kfree(c);
			
 
				-
			
 
				 	return rc;
			
 
				 }
			
 
				 
			
@@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
			
 
				+{
			
 
				+	struct ocfs2_protocol_version *pv =
			
 
				+		(struct ocfs2_protocol_version *)lvb;
			
 
				+	/*
			
 
				+	 * ocfs2_protocol_version has two u8 variables, so we don't
			
 
				+	 * need any endian conversion.
			
 
				+	 */
			
 
				+	ver->pv_major = pv->pv_major;
			
 
				+	ver->pv_minor = pv->pv_minor;
			
 
				+}
			
 
				+
			
 
				+static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
			
 
				+{
			
 
				+	struct ocfs2_protocol_version *pv =
			
 
				+		(struct ocfs2_protocol_version *)lvb;
			
 
				+	/*
			
 
				+	 * ocfs2_protocol_version has two u8 variables, so we don't
			
 
				+	 * need any endian conversion.
			
 
				+	 */
			
 
				+	pv->pv_major = ver->pv_major;
			
 
				+	pv->pv_minor = ver->pv_minor;
			
 
				+}
			
 
				+
			
 
				+static void sync_wait_cb(void *arg)
			
 
				+{
			
 
				+	struct ocfs2_cluster_connection *conn = arg;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+	complete(&lc->oc_sync_wait);
			
 
				+}
			
 
				+
			
 
				+static int sync_unlock(struct ocfs2_cluster_connection *conn,
			
 
				+		struct dlm_lksb *lksb, char *name)
			
 
				+{
			
 
				+	int error;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+
			
 
				+	error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn);
			
 
				+	if (error) {
			
 
				+		printk(KERN_ERR "%s lkid %x error %d\n",
			
 
				+				name, lksb->sb_lkid, error);
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	wait_for_completion(&lc->oc_sync_wait);
			
 
				+
			
 
				+	if (lksb->sb_status != -DLM_EUNLOCK) {
			
 
				+		printk(KERN_ERR "%s lkid %x status %d\n",
			
 
				+				name, lksb->sb_lkid, lksb->sb_status);
			
 
				+		return -1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int sync_lock(struct ocfs2_cluster_connection *conn,
			
 
				+		int mode, uint32_t flags,
			
 
				+		struct dlm_lksb *lksb, char *name)
			
 
				+{
			
 
				+	int error, status;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+
			
 
				+	error = dlm_lock(conn->cc_lockspace, mode, lksb, flags,
			
 
				+			name, strlen(name),
			
 
				+			0, sync_wait_cb, conn, NULL);
			
 
				+	if (error) {
			
 
				+		printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n",
			
 
				+				name, lksb->sb_lkid, flags, mode, error);
			
 
				+		return error;
			
 
				+	}
			
 
				+
			
 
				+	wait_for_completion(&lc->oc_sync_wait);
			
 
				+
			
 
				+	status = lksb->sb_status;
			
 
				+
			
 
				+	if (status && status != -EAGAIN) {
			
 
				+		printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n",
			
 
				+				name, lksb->sb_lkid, flags, mode, status);
			
 
				+	}
			
 
				+
			
 
				+	return status;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int version_lock(struct ocfs2_cluster_connection *conn, int mode,
			
 
				+		int flags)
			
 
				+{
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+	return sync_lock(conn, mode, flags,
			
 
				+			&lc->oc_version_lksb, VERSION_LOCK);
			
 
				+}
			
 
				+
			
 
				+static int version_unlock(struct ocfs2_cluster_connection *conn)
			
 
				+{
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+	return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK);
			
 
				+}
			
 
				+
			
 
				+/* get_protocol_version()
			
 
				+ *
			
 
				+ * To exchange ocfs2 versioning, we use the LVB of the version dlm lock.
			
 
				+ * The algorithm is:
			
 
				+ * 1. Attempt to take the lock in EX mode (non-blocking).
			
 
				+ * 2. If successful (which means it is the first mount), write the
			
 
				+ *    version number and downconvert to PR lock.
			
 
				+ * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after
			
 
				+ *    taking the PR lock.
			
 
				+ */
			
 
				+
			
 
				+static int get_protocol_version(struct ocfs2_cluster_connection *conn)
			
 
				+{
			
 
				+	int ret;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+	struct ocfs2_protocol_version pv;
			
 
				+
			
 
				+	running_proto.pv_major =
			
 
				+		ocfs2_user_plugin.sp_max_proto.pv_major;
			
 
				+	running_proto.pv_minor =
			
 
				+		ocfs2_user_plugin.sp_max_proto.pv_minor;
			
 
				+
			
 
				+	lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb;
			
 
				+	ret = version_lock(conn, DLM_LOCK_EX,
			
 
				+			DLM_LKF_VALBLK|DLM_LKF_NOQUEUE);
			
 
				+	if (!ret) {
			
 
				+		conn->cc_version.pv_major = running_proto.pv_major;
			
 
				+		conn->cc_version.pv_minor = running_proto.pv_minor;
			
 
				+		version_to_lvb(&running_proto, lc->oc_lvb);
			
 
				+		version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
			
 
				+	} else if (ret == -EAGAIN) {
			
 
				+		ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK);
			
 
				+		if (ret)
			
 
				+			goto out;
			
 
				+		lvb_to_version(lc->oc_lvb, &pv);
			
 
				+
			
 
				+		if ((pv.pv_major != running_proto.pv_major) ||
			
 
				+				(pv.pv_minor > running_proto.pv_minor)) {
			
 
				+			ret = -EINVAL;
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		conn->cc_version.pv_major = pv.pv_major;
			
 
				+		conn->cc_version.pv_minor = pv.pv_minor;
			
 
				+	}
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void user_recover_prep(void *arg)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static void user_recover_slot(void *arg, struct dlm_slot *slot)
			
 
				+{
			
 
				+	struct ocfs2_cluster_connection *conn = arg;
			
 
				+	printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
			
 
				+			slot->nodeid, slot->slot);
			
 
				+	conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static void user_recover_done(void *arg, struct dlm_slot *slots,
			
 
				+		int num_slots, int our_slot,
			
 
				+		uint32_t generation)
			
 
				+{
			
 
				+	struct ocfs2_cluster_connection *conn = arg;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < num_slots; i++)
			
 
				+		if (slots[i].slot == our_slot) {
			
 
				+			atomic_set(&lc->oc_this_node, slots[i].nodeid);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+	lc->oc_our_slot = our_slot;
			
 
				+	wake_up(&lc->oc_wait);
			
 
				+}
			
 
				+
			
 
				+static const struct dlm_lockspace_ops ocfs2_ls_ops = {
			
 
				+	.recover_prep = user_recover_prep,
			
 
				+	.recover_slot = user_recover_slot,
			
 
				+	.recover_done = user_recover_done,
			
 
				+};
			
 
				+
			
 
				+static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
			
 
				+{
			
 
				+	version_unlock(conn);
			
 
				+	dlm_release_lockspace(conn->cc_lockspace, 2);
			
 
				+	conn->cc_lockspace = NULL;
			
 
				+	ocfs2_live_connection_drop(conn->cc_private);
			
 
				+	conn->cc_private = NULL;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
			
 
				 {
			
 
				 	dlm_lockspace_t *fsdlm;
			
 
				-	struct ocfs2_live_connection *uninitialized_var(control);
			
 
				-	int rc = 0;
			
 
				+	struct ocfs2_live_connection *lc;
			
 
				+	int rc, ops_rv;
			
 
				 
			
 
				 	BUG_ON(conn == NULL);
			
 
				 
			
 
				-	rc = ocfs2_live_connection_new(conn, &control);
			
 
				+	lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
			
 
				+	if (!lc) {
			
 
				+		rc = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	init_waitqueue_head(&lc->oc_wait);
			
 
				+	init_completion(&lc->oc_sync_wait);
			
 
				+	atomic_set(&lc->oc_this_node, 0);
			
 
				+	conn->cc_private = lc;
			
 
				+	lc->oc_type = NO_CONTROLD;
			
 
				+
			
 
				+	rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
			
 
				+			       DLM_LSFL_FS, DLM_LVB_LEN,
			
 
				+			       &ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
			
 
				+	if (rc)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (ops_rv == -EOPNOTSUPP) {
			
 
				+		lc->oc_type = WITH_CONTROLD;
			
 
				+		printk(KERN_NOTICE "ocfs2: You seem to be using an older "
			
 
				+				"version of dlm_controld and/or ocfs2-tools."
			
 
				+				" Please consider upgrading.\n");
			
 
				+	} else if (ops_rv) {
			
 
				+		rc = ops_rv;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	conn->cc_lockspace = fsdlm;
			
 
				+
			
 
				+	rc = ocfs2_live_connection_attach(conn, lc);
			
 
				 	if (rc)
			
 
				 		goto out;
			
 
				 
			
 
				+	if (lc->oc_type == NO_CONTROLD) {
			
 
				+		rc = get_protocol_version(conn);
			
 
				+		if (rc) {
			
 
				+			printk(KERN_ERR "ocfs2: Could not determine"
			
 
				+					" locking version\n");
			
 
				+			user_cluster_disconnect(conn);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * running_proto must have been set before we allowed any mounts
			
 
				 	 * to proceed.
			
@@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 
				 	if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
			
 
				 		printk(KERN_ERR
			
 
				 		       "Unable to mount with fs locking protocol version "
			
 
				-		       "%u.%u because the userspace control daemon has "
			
 
				-		       "negotiated %u.%u\n",
			
 
				+		       "%u.%u because negotiated protocol is %u.%u\n",
			
 
				 		       conn->cc_version.pv_major, conn->cc_version.pv_minor,
			
 
				 		       running_proto.pv_major, running_proto.pv_minor);
			
 
				 		rc = -EPROTO;
			
 
				-		ocfs2_live_connection_drop(control);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
			
 
				-			       NULL, NULL, NULL, &fsdlm);
			
 
				-	if (rc) {
			
 
				-		ocfs2_live_connection_drop(control);
			
 
				-		goto out;
			
 
				+		ocfs2_live_connection_drop(lc);
			
 
				+		lc = NULL;
			
 
				 	}
			
 
				 
			
 
				-	conn->cc_private = control;
			
 
				-	conn->cc_lockspace = fsdlm;
			
 
				 out:
			
 
				+	if (rc && lc)
			
 
				+		kfree(lc);
			
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				-static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
			
 
				-{
			
 
				-	dlm_release_lockspace(conn->cc_lockspace, 2);
			
 
				-	conn->cc_lockspace = NULL;
			
 
				-	ocfs2_live_connection_drop(conn->cc_private);
			
 
				-	conn->cc_private = NULL;
			
 
				-	return 0;
			
 
				-}
			
 
				 
			
 
				-static int user_cluster_this_node(unsigned int *this_node)
			
 
				+static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
			
 
				+				  unsigned int *this_node)
			
 
				 {
			
 
				 	int rc;
			
 
				+	struct ocfs2_live_connection *lc = conn->cc_private;
			
 
				+
			
 
				+	if (lc->oc_type == WITH_CONTROLD)
			
 
				+		rc = ocfs2_control_get_this_node();
			
 
				+	else if (lc->oc_type == NO_CONTROLD)
			
 
				+		rc = atomic_read(&lc->oc_this_node);
			
 
				+	else
			
 
				+		rc = -EINVAL;
			
 
				 
			
 
				-	rc = ocfs2_control_get_this_node();
			
 
				 	if (rc < 0)
			
 
				 		return rc;
			
 
				 
			
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
 
				 EXPORT_SYMBOL_GPL(ocfs2_plock);
			
 
				 
			
 
				 int ocfs2_cluster_connect(const char *stack_name,
			
 
				+			  const char *cluster_name,
			
 
				+			  int cluster_name_len,
			
 
				 			  const char *group,
			
 
				 			  int grouplen,
			
 
				 			  struct ocfs2_locking_protocol *lproto,
			
@@ -342,8 +344,10 @@ int ocfs2_cluster_connect(const char *stack_name,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	memcpy(new_conn->cc_name, group, grouplen);
			
 
				+	strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
			
 
				 	new_conn->cc_namelen = grouplen;
			
 
				+	strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1);
			
 
				+	new_conn->cc_cluster_name_len = cluster_name_len;
			
 
				 	new_conn->cc_recovery_handler = recovery_handler;
			
 
				 	new_conn->cc_recovery_data = recovery_data;
			
 
				 
			
@@ -386,8 +390,9 @@ int ocfs2_cluster_connect_agnostic(const char *group,
 
				 
			
 
				 	if (cluster_stack_name[0])
			
 
				 		stack_name = cluster_stack_name;
			
 
				-	return ocfs2_cluster_connect(stack_name, group, grouplen, lproto,
			
 
				-				     recovery_handler, recovery_data, conn);
			
 
				+	return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
			
 
				+				     lproto, recovery_handler, recovery_data,
			
 
				+				     conn);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
			
 
				 
			
@@ -460,9 +465,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
			
 
				 
			
 
				-int ocfs2_cluster_this_node(unsigned int *node)
			
 
				+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
			
 
				+			    unsigned int *node)
			
 
				 {
			
 
				-	return active_stack->sp_ops->this_node(node);
			
 
				+	return active_stack->sp_ops->this_node(conn, node);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
			
 
				 
			
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -45,6 +45,9 @@ struct file_lock;
 
				  */
			
 
				 #define GROUP_NAME_MAX		64
			
 
				 
			
 
				+/* This shadows  OCFS2_CLUSTER_NAME_LEN */
			
 
				+#define CLUSTER_NAME_MAX	16
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * ocfs2_protocol_version changes when ocfs2 does something different in
			
@@ -97,8 +100,10 @@ struct ocfs2_locking_protocol {
 
				  * locking compatibility.
			
 
				  */
			
 
				 struct ocfs2_cluster_connection {
			
 
				-	char cc_name[GROUP_NAME_MAX];
			
 
				+	char cc_name[GROUP_NAME_MAX + 1];
			
 
				 	int cc_namelen;
			
 
				+	char cc_cluster_name[CLUSTER_NAME_MAX + 1];
			
 
				+	int cc_cluster_name_len;
			
 
				 	struct ocfs2_protocol_version cc_version;
			
 
				 	struct ocfs2_locking_protocol *cc_proto;
			
 
				 	void (*cc_recovery_handler)(int node_num, void *recovery_data);
			
@@ -152,7 +157,8 @@ struct ocfs2_stack_operations {
 
				 	 * ->this_node() returns the cluster's unique identifier for the
			
 
				 	 * local node.
			
 
				 	 */
			
 
				-	int (*this_node)(unsigned int *node);
			
 
				+	int (*this_node)(struct ocfs2_cluster_connection *conn,
			
 
				+			 unsigned int *node);
			
 
				 
			
 
				 	/*
			
 
				 	 * Call the underlying dlm lock function.  The ->dlm_lock()
			
@@ -239,6 +245,8 @@ struct ocfs2_stack_plugin {
 
				 
			
 
				 /* Used by the filesystem */
			
 
				 int ocfs2_cluster_connect(const char *stack_name,
			
 
				+			  const char *cluster_name,
			
 
				+			  int cluster_name_len,
			
 
				 			  const char *group,
			
 
				 			  int grouplen,
			
 
				 			  struct ocfs2_locking_protocol *lproto,
			
@@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group,
 
				 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
			
 
				 			     int hangup_pending);
			
 
				 void ocfs2_cluster_hangup(const char *group, int grouplen);
			
 
				-int ocfs2_cluster_this_node(unsigned int *node);
			
 
				+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
			
 
				+			    unsigned int *node);
			
 
				 
			
 
				 struct ocfs2_lock_res;
			
 
				 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
			
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 
				 				     struct ocfs2_suballoc_result *res);
			
 
				 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
			
 
				 					 int nr);
			
 
				-static inline int ocfs2_block_group_set_bits(handle_t *handle,
			
 
				-					     struct inode *alloc_inode,
			
 
				-					     struct ocfs2_group_desc *bg,
			
 
				-					     struct buffer_head *group_bh,
			
 
				-					     unsigned int bit_off,
			
 
				-					     unsigned int num_bits);
			
 
				 static int ocfs2_relink_block_group(handle_t *handle,
			
 
				 				    struct inode *alloc_inode,
			
 
				 				    struct buffer_head *fe_bh,
			
@@ -1343,7 +1337,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static inline int ocfs2_block_group_set_bits(handle_t *handle,
			
 
				+int ocfs2_block_group_set_bits(handle_t *handle,
			
 
				 					     struct inode *alloc_inode,
			
 
				 					     struct ocfs2_group_desc *bg,
			
 
				 					     struct buffer_head *group_bh,
			
@@ -1388,8 +1382,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 
				 	ocfs2_journal_dirty(handle, group_bh);
			
 
				 
			
 
				 bail:
			
 
				-	if (status)
			
 
				-		mlog_errno(status);
			
 
				 	return status;
			
 
				 }
			
 
				 
			
@@ -1588,7 +1580,7 @@ static int ocfs2_block_group_search(struct inode *inode,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
			
 
				+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
			
 
				 				       handle_t *handle,
			
 
				 				       struct buffer_head *di_bh,
			
 
				 				       u32 num_bits,
			
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -86,6 +86,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
				 			   u32 bits_wanted,
			
 
				 			   struct ocfs2_alloc_context **ac);
			
 
				 
			
 
				+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
			
 
				+			 handle_t *handle,
			
 
				+			 struct buffer_head *di_bh,
			
 
				+			 u32 num_bits,
			
 
				+			 u16 chain);
			
 
				+int ocfs2_block_group_set_bits(handle_t *handle,
			
 
				+			 struct inode *alloc_inode,
			
 
				+			 struct ocfs2_group_desc *bg,
			
 
				+			 struct buffer_head *group_bh,
			
 
				+			 unsigned int bit_off,
			
 
				+			 unsigned int num_bits);
			
 
				+
			
 
				 int ocfs2_claim_metadata(handle_t *handle,
			
 
				 			 struct ocfs2_alloc_context *ac,
			
 
				 			 u32 bits_wanted,
			
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,6 @@
 
				 #include "super.h"
			
 
				 #include "sysfile.h"
			
 
				 #include "uptodate.h"
			
 
				-#include "ver.h"
			
 
				 #include "xattr.h"
			
 
				 #include "quota.h"
			
 
				 #include "refcounttree.h"
			
@@ -90,6 +89,7 @@ static struct dentry *ocfs2_debugfs_root = NULL;
 
				 
			
 
				 MODULE_AUTHOR("Oracle");
			
 
				 MODULE_LICENSE("GPL");
			
 
				+MODULE_DESCRIPTION("OCFS2 cluster file system");
			
 
				 
			
 
				 struct mount_options
			
 
				 {
			
@@ -1618,8 +1618,6 @@ static int __init ocfs2_init(void)
 
				 {
			
 
				 	int status, i;
			
 
				 
			
 
				-	ocfs2_print_version();
			
 
				-
			
 
				 	for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
			
 
				 		init_waitqueue_head(&ocfs2__ioend_wq[i]);
			
 
				 
			
@@ -1947,11 +1945,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 
				 
			
 
				 	ocfs2_shutdown_local_alloc(osb);
			
 
				 
			
 
				-	ocfs2_truncate_log_shutdown(osb);
			
 
				-
			
 
				 	/* This will disable recovery and flush any recovery work. */
			
 
				 	ocfs2_recovery_exit(osb);
			
 
				 
			
 
				+	/*
			
 
				+	 * During dismount, when it recovers another node it will call
			
 
				+	 * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
			
 
				+	 */
			
 
				+	ocfs2_truncate_log_shutdown(osb);
			
 
				+
			
 
				 	ocfs2_journal_shutdown(osb);
			
 
				 
			
 
				 	ocfs2_sync_blockdev(sb);
			
@@ -2225,10 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	if (ocfs2_clusterinfo_valid(osb)) {
			
 
				 		osb->osb_stackflags =
			
 
				 			OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
			
 
				-		memcpy(osb->osb_cluster_stack,
			
 
				+		strlcpy(osb->osb_cluster_stack,
			
 
				 		       OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
			
 
				-		       OCFS2_STACK_LABEL_LEN);
			
 
				-		osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
			
 
				+		       OCFS2_STACK_LABEL_LEN + 1);
			
 
				 		if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
			
 
				 			mlog(ML_ERROR,
			
 
				 			     "couldn't mount because of an invalid "
			
@@ -2237,6 +2238,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 			status = -EINVAL;
			
 
				 			goto bail;
			
 
				 		}
			
 
				+		strlcpy(osb->osb_cluster_name,
			
 
				+			OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
			
 
				+			OCFS2_CLUSTER_NAME_LEN + 1);
			
 
				 	} else {
			
 
				 		/* The empty string is identical with classic tools that
			
 
				 		 * don't know about s_cluster_info. */
			
--- a/fs/ocfs2/ver.c
+++ b/fs/ocfs2/ver.c
@@ -1,43 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * ver.c
			
 
				- *
			
 
				- * version string
			
 
				- *
			
 
				- * Copyright (C) 2002, 2005 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#include <linux/module.h>
			
 
				-#include <linux/string.h>
			
 
				-#include <linux/kernel.h>
			
 
				-
			
 
				-#include "ver.h"
			
 
				-
			
 
				-#define OCFS2_BUILD_VERSION "1.5.0"
			
 
				-
			
 
				-#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
			
 
				-
			
 
				-void ocfs2_print_version(void)
			
 
				-{
			
 
				-	printk(KERN_INFO "%s\n", VERSION_STR);
			
 
				-}
			
 
				-
			
 
				-MODULE_DESCRIPTION(VERSION_STR);
			
 
				-
			
 
				-MODULE_VERSION(OCFS2_BUILD_VERSION);
			
--- a/fs/ocfs2/ver.h
+++ b/fs/ocfs2/ver.h
@@ -1,31 +0,0 @@
 
				-/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				- * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				- *
			
 
				- * ver.h
			
 
				- *
			
 
				- * Function prototypes
			
 
				- *
			
 
				- * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public
			
 
				- * License as published by the Free Software Foundation; either
			
 
				- * version 2 of the License, or (at your option) any later version.
			
 
				- *
			
 
				- * This program is distributed in the hope that it will be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				- * General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public
			
 
				- * License along with this program; if not, write to the
			
 
				- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
			
 
				- * Boston, MA 021110-1307, USA.
			
 
				- */
			
 
				-
			
 
				-#ifndef OCFS2_VER_H
			
 
				-#define OCFS2_VER_H
			
 
				-
			
 
				-void ocfs2_print_version(void);
			
 
				-
			
 
				-#endif /* OCFS2_VER_H */
			
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,11 +22,80 @@
 
				 
			
 
				 #include <linux/errno.h>
			
 
				 
			
 
				-EXPORT_SYMBOL(posix_acl_init);
			
 
				-EXPORT_SYMBOL(posix_acl_alloc);
			
 
				-EXPORT_SYMBOL(posix_acl_valid);
			
 
				-EXPORT_SYMBOL(posix_acl_equiv_mode);
			
 
				-EXPORT_SYMBOL(posix_acl_from_mode);
			
 
				+struct posix_acl **acl_by_type(struct inode *inode, int type)
			
 
				+{
			
 
				+	switch (type) {
			
 
				+	case ACL_TYPE_ACCESS:
			
 
				+		return &inode->i_acl;
			
 
				+	case ACL_TYPE_DEFAULT:
			
 
				+		return &inode->i_default_acl;
			
 
				+	default:
			
 
				+		BUG();
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL(acl_by_type);
			
 
				+
			
 
				+struct posix_acl *get_cached_acl(struct inode *inode, int type)
			
 
				+{
			
 
				+	struct posix_acl **p = acl_by_type(inode, type);
			
 
				+	struct posix_acl *acl = ACCESS_ONCE(*p);
			
 
				+	if (acl) {
			
 
				+		spin_lock(&inode->i_lock);
			
 
				+		acl = *p;
			
 
				+		if (acl != ACL_NOT_CACHED)
			
 
				+			acl = posix_acl_dup(acl);
			
 
				+		spin_unlock(&inode->i_lock);
			
 
				+	}
			
 
				+	return acl;
			
 
				+}
			
 
				+EXPORT_SYMBOL(get_cached_acl);
			
 
				+
			
 
				+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
			
 
				+{
			
 
				+	return rcu_dereference(*acl_by_type(inode, type));
			
 
				+}
			
 
				+EXPORT_SYMBOL(get_cached_acl_rcu);
			
 
				+
			
 
				+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
			
 
				+{
			
 
				+	struct posix_acl **p = acl_by_type(inode, type);
			
 
				+	struct posix_acl *old;
			
 
				+	spin_lock(&inode->i_lock);
			
 
				+	old = *p;
			
 
				+	rcu_assign_pointer(*p, posix_acl_dup(acl));
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	if (old != ACL_NOT_CACHED)
			
 
				+		posix_acl_release(old);
			
 
				+}
			
 
				+EXPORT_SYMBOL(set_cached_acl);
			
 
				+
			
 
				+void forget_cached_acl(struct inode *inode, int type)
			
 
				+{
			
 
				+	struct posix_acl **p = acl_by_type(inode, type);
			
 
				+	struct posix_acl *old;
			
 
				+	spin_lock(&inode->i_lock);
			
 
				+	old = *p;
			
 
				+	*p = ACL_NOT_CACHED;
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	if (old != ACL_NOT_CACHED)
			
 
				+		posix_acl_release(old);
			
 
				+}
			
 
				+EXPORT_SYMBOL(forget_cached_acl);
			
 
				+
			
 
				+void forget_all_cached_acls(struct inode *inode)
			
 
				+{
			
 
				+	struct posix_acl *old_access, *old_default;
			
 
				+	spin_lock(&inode->i_lock);
			
 
				+	old_access = inode->i_acl;
			
 
				+	old_default = inode->i_default_acl;
			
 
				+	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
			
 
				+	spin_unlock(&inode->i_lock);
			
 
				+	if (old_access != ACL_NOT_CACHED)
			
 
				+		posix_acl_release(old_access);
			
 
				+	if (old_default != ACL_NOT_CACHED)
			
 
				+		posix_acl_release(old_default);
			
 
				+}
			
 
				+EXPORT_SYMBOL(forget_all_cached_acls);
			
 
				 
			
 
				 /*
			
 
				  * Init a fresh posix_acl
			
@@ -37,6 +106,7 @@ posix_acl_init(struct posix_acl *acl, int count)
 
				 	atomic_set(&acl->a_refcount, 1);
			
 
				 	acl->a_count = count;
			
 
				 }
			
 
				+EXPORT_SYMBOL(posix_acl_init);
			
 
				 
			
 
				 /*
			
 
				  * Allocate a new ACL with the specified number of entries.
			
@@ -51,6 +121,7 @@ posix_acl_alloc(int count, gfp_t flags)
 
				 		posix_acl_init(acl, count);
			
 
				 	return acl;
			
 
				 }
			
 
				+EXPORT_SYMBOL(posix_acl_alloc);
			
 
				 
			
 
				 /*
			
 
				  * Clone an ACL.
			
@@ -146,6 +217,7 @@ posix_acl_valid(const struct posix_acl *acl)
 
				 		return 0;
			
 
				 	return -EINVAL;
			
 
				 }
			
 
				+EXPORT_SYMBOL(posix_acl_valid);
			
 
				 
			
 
				 /*
			
 
				  * Returns 0 if the acl can be exactly represented in the traditional
			
@@ -186,6 +258,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
 
				                 *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
			
 
				         return not_equiv;
			
 
				 }
			
 
				+EXPORT_SYMBOL(posix_acl_equiv_mode);
			
 
				 
			
 
				 /*
			
 
				  * Create an ACL representing the file mode permission bits of an inode.
			
@@ -207,6 +280,7 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
 
				 	acl->a_entries[2].e_perm = (mode & S_IRWXO);
			
 
				 	return acl;
			
 
				 }
			
 
				+EXPORT_SYMBOL(posix_acl_from_mode);
			
 
				 
			
 
				 /*
			
 
				  * Return 0 if current is granted want access to the inode
			
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,7 +26,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
				 	unsigned long committed;
			
 
				 	struct vmalloc_info vmi;
			
 
				 	long cached;
			
 
				+	long available;
			
 
				+	unsigned long pagecache;
			
 
				+	unsigned long wmark_low = 0;
			
 
				 	unsigned long pages[NR_LRU_LISTS];
			
 
				+	struct zone *zone;
			
 
				 	int lru;
			
 
				 
			
 
				 /*
			
@@ -47,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
				 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
			
 
				 		pages[lru] = global_page_state(NR_LRU_BASE + lru);
			
 
				 
			
 
				+	for_each_zone(zone)
			
 
				+		wmark_low += zone->watermark[WMARK_LOW];
			
 
				+
			
 
				+	/*
			
 
				+	 * Estimate the amount of memory available for userspace allocations,
			
 
				+	 * without causing swapping.
			
 
				+	 *
			
 
				+	 * Free memory cannot be taken below the low watermark, before the
			
 
				+	 * system starts swapping.
			
 
				+	 */
			
 
				+	available = i.freeram - wmark_low;
			
 
				+
			
 
				+	/*
			
 
				+	 * Not all the page cache can be freed, otherwise the system will
			
 
				+	 * start swapping. Assume at least half of the page cache, or the
			
 
				+	 * low watermark worth of cache, needs to stay.
			
 
				+	 */
			
 
				+	pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
			
 
				+	pagecache -= min(pagecache / 2, wmark_low);
			
 
				+	available += pagecache;
			
 
				+
			
 
				+	/*
			
 
				+	 * Part of the reclaimable swap consists of items that are in use,
			
 
				+	 * and cannot be freed. Cap this estimate at the low watermark.
			
 
				+	 */
			
 
				+	available += global_page_state(NR_SLAB_RECLAIMABLE) -
			
 
				+		     min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
			
 
				+
			
 
				+	if (available < 0)
			
 
				+		available = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * Tagged format, for easy grepping and expansion.
			
 
				 	 */
			
 
				 	seq_printf(m,
			
 
				 		"MemTotal:       %8lu kB\n"
			
 
				 		"MemFree:        %8lu kB\n"
			
 
				+		"MemAvailable:   %8lu kB\n"
			
 
				 		"Buffers:        %8lu kB\n"
			
 
				 		"Cached:         %8lu kB\n"
			
 
				 		"SwapCached:     %8lu kB\n"
			
@@ -105,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
				 		,
			
 
				 		K(i.totalram),
			
 
				 		K(i.freeram),
			
 
				+		K(available),
			
 
				 		K(i.bufferram),
			
 
				 		K(cached),
			
 
				 		K(total_swapcache_pages()),
			
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -275,4 +275,4 @@ int __init init_ramfs_fs(void)
 
				 
			
 
				 	return err;
			
 
				 }
			
 
				-module_init(init_ramfs_fs)
			
 
				+fs_initcall(init_ramfs_fs);
			
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -901,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 
				 	io_fn_t fn;
			
 
				 	iov_fn_t fnv;
			
 
				 
			
 
				-	ret = -EFAULT;
			
 
				-	if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
			
 
				-		goto out;
			
 
				-
			
 
				 	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
			
 
				 					       UIO_FASTIOV, iovstack, &iov);
			
 
				 	if (ret <= 0)
			
--- a/fs/super.c
+++ b/fs/super.c
@@ -166,6 +166,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 
				 	if (!s)
			
 
				 		return NULL;
			
 
				 
			
 
				+	INIT_LIST_HEAD(&s->s_mounts);
			
 
				+
			
 
				 	if (security_sb_alloc(s))
			
 
				 		goto fail;
			
 
				 
			
@@ -188,7 +190,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 
				 	if (list_lru_init(&s->s_inode_lru))
			
 
				 		goto fail;
			
 
				 
			
 
				-	INIT_LIST_HEAD(&s->s_mounts);
			
 
				 	init_rwsem(&s->s_umount);
			
 
				 	lockdep_set_class(&s->s_umount, &type->s_umount_key);
			
 
				 	/*
			
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -5,6 +5,7 @@
 
				 #define _LINUX_BOOTMEM_H
			
 
				 
			
 
				 #include <linux/mmzone.h>
			
 
				+#include <linux/mm_types.h>
			
 
				 #include <asm/dma.h>
			
 
				 
			
 
				 /*
			
@@ -52,7 +53,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 
				 			      unsigned long size);
			
 
				 extern void free_bootmem(unsigned long physaddr, unsigned long size);
			
 
				 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
			
 
				-extern void __free_pages_bootmem(struct page *page, unsigned int order);
			
 
				 
			
 
				 /*
			
 
				  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
			
@@ -142,6 +142,157 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 
				 #define alloc_bootmem_low_pages_node(pgdat, x) \
			
 
				 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
			
 
				 
			
 
				+
			
 
				+#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
			
 
				+
			
 
				+/* FIXME: use MEMBLOCK_ALLOC_* variants here */
			
 
				+#define BOOTMEM_ALLOC_ACCESSIBLE	0
			
 
				+#define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
			
 
				+
			
 
				+/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
			
 
				+void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
			
 
				+		phys_addr_t align, phys_addr_t min_addr,
			
 
				+		phys_addr_t max_addr, int nid);
			
 
				+void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
			
 
				+		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
			
 
				+void __memblock_free_early(phys_addr_t base, phys_addr_t size);
			
 
				+void __memblock_free_late(phys_addr_t base, phys_addr_t size);
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc(
			
 
				+					phys_addr_t size,  phys_addr_t align)
			
 
				+{
			
 
				+	return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
			
 
				+					    BOOTMEM_ALLOC_ACCESSIBLE,
			
 
				+					    NUMA_NO_NODE);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_nopanic(
			
 
				+					phys_addr_t size, phys_addr_t align)
			
 
				+{
			
 
				+	return memblock_virt_alloc_try_nid_nopanic(size, align,
			
 
				+						    BOOTMEM_LOW_LIMIT,
			
 
				+						    BOOTMEM_ALLOC_ACCESSIBLE,
			
 
				+						    NUMA_NO_NODE);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_from_nopanic(
			
 
				+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
			
 
				+{
			
 
				+	return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
			
 
				+						    BOOTMEM_ALLOC_ACCESSIBLE,
			
 
				+						    NUMA_NO_NODE);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_node(
			
 
				+						phys_addr_t size, int nid)
			
 
				+{
			
 
				+	return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
			
 
				+					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_node_nopanic(
			
 
				+						phys_addr_t size, int nid)
			
 
				+{
			
 
				+	return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
			
 
				+						    BOOTMEM_ALLOC_ACCESSIBLE,
			
 
				+						    nid);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_early(
			
 
				+					phys_addr_t base, phys_addr_t size)
			
 
				+{
			
 
				+	__memblock_free_early(base, size);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_early_nid(
			
 
				+				phys_addr_t base, phys_addr_t size, int nid)
			
 
				+{
			
 
				+	__memblock_free_early(base, size);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_late(
			
 
				+					phys_addr_t base, phys_addr_t size)
			
 
				+{
			
 
				+	__memblock_free_late(base, size);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+#define BOOTMEM_ALLOC_ACCESSIBLE	0
			
 
				+
			
 
				+
			
 
				+/* Fall back to all the existing bootmem APIs */
			
 
				+static inline void * __init memblock_virt_alloc(
			
 
				+					phys_addr_t size,  phys_addr_t align)
			
 
				+{
			
 
				+	if (!align)
			
 
				+		align = SMP_CACHE_BYTES;
			
 
				+	return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_nopanic(
			
 
				+					phys_addr_t size, phys_addr_t align)
			
 
				+{
			
 
				+	if (!align)
			
 
				+		align = SMP_CACHE_BYTES;
			
 
				+	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_from_nopanic(
			
 
				+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
			
 
				+{
			
 
				+	return __alloc_bootmem_nopanic(size, align, min_addr);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_node(
			
 
				+						phys_addr_t size, int nid)
			
 
				+{
			
 
				+	return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
			
 
				+				     BOOTMEM_LOW_LIMIT);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_node_nopanic(
			
 
				+						phys_addr_t size, int nid)
			
 
				+{
			
 
				+	return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
			
 
				+					     SMP_CACHE_BYTES,
			
 
				+					     BOOTMEM_LOW_LIMIT);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
			
 
				+	phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
			
 
				+{
			
 
				+	return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
			
 
				+					  min_addr);
			
 
				+}
			
 
				+
			
 
				+static inline void * __init memblock_virt_alloc_try_nid_nopanic(
			
 
				+			phys_addr_t size, phys_addr_t align,
			
 
				+			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
			
 
				+{
			
 
				+	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
			
 
				+				min_addr, max_addr);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_early(
			
 
				+					phys_addr_t base, phys_addr_t size)
			
 
				+{
			
 
				+	free_bootmem(base, size);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_early_nid(
			
 
				+				phys_addr_t base, phys_addr_t size, int nid)
			
 
				+{
			
 
				+	free_bootmem_node(NODE_DATA(nid), base, size);
			
 
				+}
			
 
				+
			
 
				+static inline void __init memblock_free_late(
			
 
				+					phys_addr_t base, phys_addr_t size)
			
 
				+{
			
 
				+	free_bootmem_late(base, size);
			
 
				+}
			
 
				+#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
			
 
				+
			
 
				 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
			
 
				 extern void *alloc_remap(int nid, unsigned long size);
			
 
				 #else
			
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order)
 
				 	return zone->compact_considered < defer_limit;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Update defer tracking counters after successful compaction of given order,
			
 
				+ * which means an allocation either succeeded (alloc_success == true) or is
			
 
				+ * expected to succeed.
			
 
				+ */
			
 
				+static inline void compaction_defer_reset(struct zone *zone, int order,
			
 
				+		bool alloc_success)
			
 
				+{
			
 
				+	if (alloc_success) {
			
 
				+		zone->compact_considered = 0;
			
 
				+		zone->compact_defer_shift = 0;
			
 
				+	}
			
 
				+	if (order >= zone->compact_order_failed)
			
 
				+		zone->compact_order_failed = order + 1;
			
 
				+}
			
 
				+
			
 
				 /* Returns true if restarting compaction after many failures */
			
 
				 static inline bool compaction_restarting(struct zone *zone, int order)
			
 
				 {
			
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
 
				 
			
 
				 extern void debug_dma_dump_mappings(struct device *dev);
			
 
				 
			
 
				+extern void debug_dma_assert_idle(struct page *page);
			
 
				+
			
 
				 #else /* CONFIG_DMA_API_DEBUG */
			
 
				 
			
 
				 static inline void dma_debug_add_bus(struct bus_type *bus)
			
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
 
				 {
			
 
				 }
			
 
				 
			
 
				+static inline void debug_dma_assert_idle(struct page *page)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 #endif /* CONFIG_DMA_API_DEBUG */
			
 
				 
			
 
				 #endif /* __DMA_DEBUG_H */
			
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -15,7 +15,6 @@
 
				 #include <linux/path.h> /* struct path */
			
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/types.h>
			
 
				-
			
 
				 #include <linux/atomic.h>
			
 
				 
			
 
				 /*
			
@@ -79,6 +78,7 @@ struct fsnotify_group;
 
				 struct fsnotify_event;
			
 
				 struct fsnotify_mark;
			
 
				 struct fsnotify_event_private_data;
			
 
				+struct fsnotify_fname;
			
 
				 
			
 
				 /*
			
 
				  * Each group much define these ops.  The fsnotify infrastructure will call
			
@@ -94,17 +94,27 @@ struct fsnotify_event_private_data;
 
				  * 		userspace messages that marks have been removed.
			
 
				  */
			
 
				 struct fsnotify_ops {
			
 
				-	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
			
 
				-				  struct fsnotify_mark *inode_mark,
			
 
				-				  struct fsnotify_mark *vfsmount_mark,
			
 
				-				  __u32 mask, void *data, int data_type);
			
 
				 	int (*handle_event)(struct fsnotify_group *group,
			
 
				+			    struct inode *inode,
			
 
				 			    struct fsnotify_mark *inode_mark,
			
 
				 			    struct fsnotify_mark *vfsmount_mark,
			
 
				-			    struct fsnotify_event *event);
			
 
				+			    u32 mask, void *data, int data_type,
			
 
				+			    const unsigned char *file_name);
			
 
				 	void (*free_group_priv)(struct fsnotify_group *group);
			
 
				 	void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
			
 
				-	void (*free_event_priv)(struct fsnotify_event_private_data *priv);
			
 
				+	void (*free_event)(struct fsnotify_event *event);
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * all of the information about the original object we want to now send to
			
 
				+ * a group.  If you want to carry more info from the accessing task to the
			
 
				+ * listener this structure is where you need to be adding fields.
			
 
				+ */
			
 
				+struct fsnotify_event {
			
 
				+	struct list_head list;
			
 
				+	/* inode may ONLY be dereferenced during handle_event(). */
			
 
				+	struct inode *inode;	/* either the inode the event happened to or its parent */
			
 
				+	u32 mask;		/* the type of access, bitwise OR for FS_* event types */
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -148,7 +158,11 @@ struct fsnotify_group {
 
				 					 * a group */
			
 
				 	struct list_head marks_list;	/* all inode marks for this group */
			
 
				 
			
 
				-	struct fasync_struct    *fsn_fa;    /* async notification */
			
 
				+	struct fasync_struct *fsn_fa;    /* async notification */
			
 
				+
			
 
				+	struct fsnotify_event overflow_event;	/* Event we queue when the
			
 
				+						 * notification list is too
			
 
				+						 * full */
			
 
				 
			
 
				 	/* groups can define private fields here or use the void *private */
			
 
				 	union {
			
@@ -177,76 +191,10 @@ struct fsnotify_group {
 
				 	};
			
 
				 };
			
 
				 
			
 
				-/*
			
 
				- * A single event can be queued in multiple group->notification_lists.
			
 
				- *
			
 
				- * each group->notification_list will point to an event_holder which in turns points
			
 
				- * to the actual event that needs to be sent to userspace.
			
 
				- *
			
 
				- * Seemed cheaper to create a refcnt'd event and a small holder for every group
			
 
				- * than create a different event for every group
			
 
				- *
			
 
				- */
			
 
				-struct fsnotify_event_holder {
			
 
				-	struct fsnotify_event *event;
			
 
				-	struct list_head event_list;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * Inotify needs to tack data onto an event.  This struct lets us later find the
			
 
				- * correct private data of the correct group.
			
 
				- */
			
 
				-struct fsnotify_event_private_data {
			
 
				-	struct fsnotify_group *group;
			
 
				-	struct list_head event_list;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * all of the information about the original object we want to now send to
			
 
				- * a group.  If you want to carry more info from the accessing task to the
			
 
				- * listener this structure is where you need to be adding fields.
			
 
				- */
			
 
				-struct fsnotify_event {
			
 
				-	/*
			
 
				-	 * If we create an event we are also likely going to need a holder
			
 
				-	 * to link to a group.  So embed one holder in the event.  Means only
			
 
				-	 * one allocation for the common case where we only have one group
			
 
				-	 */
			
 
				-	struct fsnotify_event_holder holder;
			
 
				-	spinlock_t lock;	/* protection for the associated event_holder and private_list */
			
 
				-	/* to_tell may ONLY be dereferenced during handle_event(). */
			
 
				-	struct inode *to_tell;	/* either the inode the event happened to or its parent */
			
 
				-	/*
			
 
				-	 * depending on the event type we should have either a path or inode
			
 
				-	 * We hold a reference on path, but NOT on inode.  Since we have the ref on
			
 
				-	 * the path, it may be dereferenced at any point during this object's
			
 
				-	 * lifetime.  That reference is dropped when this object's refcnt hits
			
 
				-	 * 0.  If this event contains an inode instead of a path, the inode may
			
 
				-	 * ONLY be used during handle_event().
			
 
				-	 */
			
 
				-	union {
			
 
				-		struct path path;
			
 
				-		struct inode *inode;
			
 
				-	};
			
 
				 /* when calling fsnotify tell it if the data is a path or inode */
			
 
				 #define FSNOTIFY_EVENT_NONE	0
			
 
				 #define FSNOTIFY_EVENT_PATH	1
			
 
				 #define FSNOTIFY_EVENT_INODE	2
			
 
				-	int data_type;		/* which of the above union we have */
			
 
				-	atomic_t refcnt;	/* how many groups still are using/need to send this event */
			
 
				-	__u32 mask;		/* the type of access, bitwise OR for FS_* event types */
			
 
				-
			
 
				-	u32 sync_cookie;	/* used to corrolate events, namely inotify mv events */
			
 
				-	const unsigned char *file_name;
			
 
				-	size_t name_len;
			
 
				-	struct pid *tgid;
			
 
				-
			
 
				-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
			
 
				-	__u32 response;	/* userspace answer to question */
			
 
				-#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
			
 
				-
			
 
				-	struct list_head private_data_list;	/* groups can store private data here */
			
 
				-};
			
 
				 
			
 
				 /*
			
 
				  * Inode specific fields in an fsnotify_mark
			
@@ -370,17 +318,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
 
				 extern void fsnotify_destroy_group(struct fsnotify_group *group);
			
 
				 /* fasync handler function */
			
 
				 extern int fsnotify_fasync(int fd, struct file *file, int on);
			
 
				-/* take a reference to an event */
			
 
				-extern void fsnotify_get_event(struct fsnotify_event *event);
			
 
				-extern void fsnotify_put_event(struct fsnotify_event *event);
			
 
				-/* find private data previously attached to an event and unlink it */
			
 
				-extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
			
 
				-									   struct fsnotify_event *event);
			
 
				-
			
 
				+/* Free event from memory */
			
 
				+extern void fsnotify_destroy_event(struct fsnotify_group *group,
			
 
				+				   struct fsnotify_event *event);
			
 
				 /* attach the event to the group notification queue */
			
 
				 extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
			
 
				 							struct fsnotify_event *event,
			
 
				-							struct fsnotify_event_private_data *priv,
			
 
				 							struct fsnotify_event *(*merge)(struct list_head *,
			
 
				 											struct fsnotify_event *));
			
 
				 /* true if the group notification queue is empty */
			
@@ -430,15 +373,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 
				 extern void fsnotify_unmount_inodes(struct list_head *list);
			
 
				 
			
 
				 /* put here because inotify does some weird stuff when destroying watches */
			
 
				-extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
			
 
				-						    void *data, int data_is,
			
 
				-						    const unsigned char *name,
			
 
				-						    u32 cookie, gfp_t gfp);
			
 
				-
			
 
				-/* fanotify likes to change events after they are on lists... */
			
 
				-extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
			
 
				-extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
			
 
				-				  struct fsnotify_event *new_event);
			
 
				+extern void fsnotify_init_event(struct fsnotify_event *event,
			
 
				+				struct inode *to_tell, u32 mask);
			
 
				 
			
 
				 #else
			
 
				 
			
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -157,6 +157,26 @@ static inline int hpage_nr_pages(struct page *page)
 
				 		return HPAGE_PMD_NR;
			
 
				 	return 1;
			
 
				 }
			
 
				+/*
			
 
				+ * compound_trans_head() should be used instead of compound_head(),
			
 
				+ * whenever the "page" passed as parameter could be the tail of a
			
 
				+ * transparent hugepage that could be undergoing a
			
 
				+ * __split_huge_page_refcount(). The page structure layout often
			
 
				+ * changes across releases and it makes extensive use of unions. So if
			
 
				+ * the page structure layout will change in a way that
			
 
				+ * page->first_page gets clobbered by __split_huge_page_refcount, the
			
 
				+ * implementation making use of smp_rmb() will be required.
			
 
				+ *
			
 
				+ * Currently we define compound_trans_head as compound_head, because
			
 
				+ * page->private is in the same union with page->first_page, and
			
 
				+ * page->private isn't clobbered. However this also means we're
			
 
				+ * currently leaving dirt into the page->private field of anonymous
			
 
				+ * pages resulting from a THP split, instead of setting page->private
			
 
				+ * to zero like for every other page that has PG_private not set. But
			
 
				+ * anonymous pages don't use page->private so this is not a problem.
			
 
				+ */
			
 
				+#if 0
			
 
				+/* This will be needed if page->private will be clobbered in split_huge_page */
			
 
				 static inline struct page *compound_trans_head(struct page *page)
			
 
				 {
			
 
				 	if (PageTail(page)) {
			
@@ -174,6 +194,9 @@ static inline struct page *compound_trans_head(struct page *page)
 
				 	}
			
 
				 	return page;
			
 
				 }
			
 
				+#else
			
 
				+#define compound_trans_head(page) compound_head(page)
			
 
				+#endif
			
 
				 
			
 
				 extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
			
 
				 				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
			
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
 
				 void hugepage_put_subpool(struct hugepage_subpool *spool);
			
 
				 
			
 
				 int PageHuge(struct page *page);
			
 
				-int PageHeadHuge(struct page *page_head);
			
 
				 
			
 
				 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
			
 
				 int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
			
@@ -104,11 +103,6 @@ static inline int PageHuge(struct page *page)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int PageHeadHuge(struct page *page_head)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
			
 
				 {
			
 
				 }
			
@@ -360,6 +354,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 
				 
			
 
				 static inline struct hstate *page_hstate(struct page *page)
			
 
				 {
			
 
				+	VM_BUG_ON(!PageHuge(page));
			
 
				 	return size_to_hstate(PAGE_SIZE << compound_order(page));
			
 
				 }
			
 
				 
			
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -41,6 +41,7 @@ extern struct fs_struct init_fs;
 
				 
			
 
				 #define INIT_SIGNALS(sig) {						\
			
 
				 	.nr_threads	= 1,						\
			
 
				+	.thread_head	= LIST_HEAD_INIT(init_task.thread_node),	\
			
 
				 	.wait_chldexit	= __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
			
 
				 	.shared_pending	= { 						\
			
 
				 		.list = LIST_HEAD_INIT(sig.shared_pending.list),	\
			
@@ -222,6 +223,7 @@ extern struct task_group root_task_group;
 
				 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
			
 
				 	},								\
			
 
				 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
			
 
				+	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),	\
			
 
				 	INIT_IDS							\
			
 
				 	INIT_PERF_EVENTS(tsk)						\
			
 
				 	INIT_TRACE_IRQFLAGS						\
			
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -73,11 +73,7 @@ static inline void set_page_stable_node(struct page *page,
 
				 struct page *ksm_might_need_to_copy(struct page *page,
			
 
				 			struct vm_area_struct *vma, unsigned long address);
			
 
				 
			
 
				-int page_referenced_ksm(struct page *page,
			
 
				-			struct mem_cgroup *memcg, unsigned long *vm_flags);
			
 
				-int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
			
 
				-int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
			
 
				-		  struct vm_area_struct *, unsigned long, void *), void *arg);
			
 
				+int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
			
 
				 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
			
 
				 
			
 
				 #else  /* !CONFIG_KSM */
			
@@ -115,13 +111,8 @@ static inline int page_referenced_ksm(struct page *page,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
			
 
				-{
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static inline int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page*,
			
 
				-		struct vm_area_struct *, unsigned long, void *), void *arg)
			
 
				+static inline int rmap_walk_ksm(struct page *page,
			
 
				+			struct rmap_walk_control *rwc)
			
 
				 {
			
 
				 	return 0;
			
 
				 }
			
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -19,9 +19,13 @@
 
				 
			
 
				 #define INIT_MEMBLOCK_REGIONS	128
			
 
				 
			
 
				+/* Definition of memblock flags. */
			
 
				+#define MEMBLOCK_HOTPLUG	0x1	/* hotpluggable region */
			
 
				+
			
 
				 struct memblock_region {
			
 
				 	phys_addr_t base;
			
 
				 	phys_addr_t size;
			
 
				+	unsigned long flags;
			
 
				 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			
 
				 	int nid;
			
 
				 #endif
			
@@ -43,12 +47,17 @@ struct memblock {
 
				 
			
 
				 extern struct memblock memblock;
			
 
				 extern int memblock_debug;
			
 
				+#ifdef CONFIG_MOVABLE_NODE
			
 
				+/* If movable_node boot option specified */
			
 
				+extern bool movable_node_enabled;
			
 
				+#endif /* CONFIG_MOVABLE_NODE */
			
 
				 
			
 
				 #define memblock_dbg(fmt, ...) \
			
 
				 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
			
 
				 
			
 
				-phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
			
 
				-				phys_addr_t size, phys_addr_t align, int nid);
			
 
				+phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
			
 
				+					    phys_addr_t start, phys_addr_t end,
			
 
				+					    int nid);
			
 
				 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
			
 
				 				   phys_addr_t size, phys_addr_t align);
			
 
				 phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
			
@@ -59,6 +68,28 @@ int memblock_remove(phys_addr_t base, phys_addr_t size);
 
				 int memblock_free(phys_addr_t base, phys_addr_t size);
			
 
				 int memblock_reserve(phys_addr_t base, phys_addr_t size);
			
 
				 void memblock_trim_memory(phys_addr_t align);
			
 
				+int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
			
 
				+int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
			
 
				+#ifdef CONFIG_MOVABLE_NODE
			
 
				+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
			
 
				+{
			
 
				+	return m->flags & MEMBLOCK_HOTPLUG;
			
 
				+}
			
 
				+
			
 
				+static inline bool movable_node_is_enabled(void)
			
 
				+{
			
 
				+	return movable_node_enabled;
			
 
				+}
			
 
				+#else
			
 
				+static inline bool memblock_is_hotpluggable(struct memblock_region *m)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+static inline bool movable_node_is_enabled(void)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+#endif
			
 
				 
			
 
				 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			
 
				 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
			
@@ -87,7 +118,7 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
 
				 /**
			
 
				  * for_each_free_mem_range - iterate through free memblock areas
			
 
				  * @i: u64 used as loop variable
			
 
				- * @nid: node selector, %MAX_NUMNODES for all nodes
			
 
				+ * @nid: node selector, %NUMA_NO_NODE for all nodes
			
 
				  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
			
 
				  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
			
 
				  * @p_nid: ptr to int for nid of the range, can be %NULL
			
@@ -107,7 +138,7 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
 
				 /**
			
 
				  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
			
 
				  * @i: u64 used as loop variable
			
 
				- * @nid: node selector, %MAX_NUMNODES for all nodes
			
 
				+ * @nid: node selector, %NUMA_NO_NODE for all nodes
			
 
				  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
			
 
				  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
			
 
				  * @p_nid: ptr to int for nid of the range, can be %NULL
			
@@ -121,8 +152,21 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
 
				 	     i != (u64)ULLONG_MAX;					\
			
 
				 	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
			
 
				 
			
 
				+static inline void memblock_set_region_flags(struct memblock_region *r,
			
 
				+					     unsigned long flags)
			
 
				+{
			
 
				+	r->flags |= flags;
			
 
				+}
			
 
				+
			
 
				+static inline void memblock_clear_region_flags(struct memblock_region *r,
			
 
				+					       unsigned long flags)
			
 
				+{
			
 
				+	r->flags &= ~flags;
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			
 
				-int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
			
 
				+int memblock_set_node(phys_addr_t base, phys_addr_t size,
			
 
				+		      struct memblock_type *type, int nid);
			
 
				 
			
 
				 static inline void memblock_set_region_node(struct memblock_region *r, int nid)
			
 
				 {
			
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -211,20 +211,8 @@ static inline void mpol_get(struct mempolicy *pol)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline struct mempolicy *mpol_dup(struct mempolicy *old)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				 struct shared_policy {};
			
 
				 
			
 
				-static inline int mpol_set_shared_policy(struct shared_policy *info,
			
 
				-					struct vm_area_struct *vma,
			
 
				-					struct mempolicy *new)
			
 
				-{
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				 static inline void mpol_shared_policy_init(struct shared_policy *sp,
			
 
				 						struct mempolicy *mpol)
			
 
				 {
			
@@ -234,12 +222,6 @@ static inline void mpol_free_shared_policy(struct shared_policy *p)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline struct mempolicy *
			
 
				-mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
			
 
				-{
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				 #define vma_policy(vma) NULL
			
 
				 
			
 
				 static inline int
			
@@ -266,10 +248,6 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void mpol_fix_fork_child_flag(struct task_struct *p)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
			
 
				 				unsigned long addr, gfp_t gfp_flags,
			
 
				 				struct mempolicy **mpol, nodemask_t **nodemask)
			
@@ -284,12 +262,6 @@ static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
 
				 	return false;
			
 
				 }
			
 
				 
			
 
				-static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
			
 
				-			const nodemask_t *mask)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
			
 
				 				   const nodemask_t *to, int flags)
			
 
				 {
			
@@ -307,10 +279,6 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-static inline void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				 static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
			
 
				 				 unsigned long address)
			
 
				 {
			
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -35,16 +35,12 @@ enum migrate_reason {
 
				 
			
 
				 #ifdef CONFIG_MIGRATION
			
 
				 
			
 
				-extern void putback_lru_pages(struct list_head *l);
			
 
				 extern void putback_movable_pages(struct list_head *l);
			
 
				 extern int migrate_page(struct address_space *,
			
 
				 			struct page *, struct page *, enum migrate_mode);
			
 
				 extern int migrate_pages(struct list_head *l, new_page_t x,
			
 
				 		unsigned long private, enum migrate_mode mode, int reason);
			
 
				 
			
 
				-extern int fail_migrate_page(struct address_space *,
			
 
				-			struct page *, struct page *);
			
 
				-
			
 
				 extern int migrate_prep(void);
			
 
				 extern int migrate_prep_local(void);
			
 
				 extern int migrate_vmas(struct mm_struct *mm,
			
@@ -59,7 +55,6 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
 
				 		int extra_count);
			
 
				 #else
			
 
				 
			
 
				-static inline void putback_lru_pages(struct list_head *l) {}
			
 
				 static inline void putback_movable_pages(struct list_head *l) {}
			
 
				 static inline int migrate_pages(struct list_head *l, new_page_t x,
			
 
				 		unsigned long private, enum migrate_mode mode, int reason)
			
@@ -86,7 +81,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 
				 
			
 
				 /* Possible settings for the migrate_page() method in address_operations */
			
 
				 #define migrate_page NULL
			
 
				-#define fail_migrate_page NULL
			
 
				 
			
 
				 #endif /* CONFIG_MIGRATION */
			
 
				 
			
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
 
				 extern unsigned long sysctl_user_reserve_kbytes;
			
 
				 extern unsigned long sysctl_admin_reserve_kbytes;
			
 
				 
			
 
				+extern int sysctl_overcommit_memory;
			
 
				+extern int sysctl_overcommit_ratio;
			
 
				+extern unsigned long sysctl_overcommit_kbytes;
			
 
				+
			
 
				+extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
			
 
				+				    size_t *, loff_t *);
			
 
				+extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
			
 
				+				    size_t *, loff_t *);
			
 
				+
			
 
				 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
			
 
				 
			
 
				 /* to align the pointer to the (next) page boundary */
			
@@ -414,15 +423,44 @@ static inline int page_count(struct page *page)
 
				 	return atomic_read(&compound_head(page)->_count);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_HUGETLB_PAGE
			
 
				+extern int PageHeadHuge(struct page *page_head);
			
 
				+#else /* CONFIG_HUGETLB_PAGE */
			
 
				+static inline int PageHeadHuge(struct page *page_head)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* CONFIG_HUGETLB_PAGE */
			
 
				+
			
 
				+static inline bool __compound_tail_refcounted(struct page *page)
			
 
				+{
			
 
				+	return !PageSlab(page) && !PageHeadHuge(page);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This takes a head page as parameter and tells if the
			
 
				+ * tail page reference counting can be skipped.
			
 
				+ *
			
 
				+ * For this to be safe, PageSlab and PageHeadHuge must remain true on
			
 
				+ * any given page where they return true here, until all tail pins
			
 
				+ * have been released.
			
 
				+ */
			
 
				+static inline bool compound_tail_refcounted(struct page *page)
			
 
				+{
			
 
				+	VM_BUG_ON(!PageHead(page));
			
 
				+	return __compound_tail_refcounted(page);
			
 
				+}
			
 
				+
			
 
				 static inline void get_huge_page_tail(struct page *page)
			
 
				 {
			
 
				 	/*
			
 
				-	 * __split_huge_page_refcount() cannot run
			
 
				-	 * from under us.
			
 
				+	 * __split_huge_page_refcount() cannot run from under us.
			
 
				 	 */
			
 
				+	VM_BUG_ON(!PageTail(page));
			
 
				 	VM_BUG_ON(page_mapcount(page) < 0);
			
 
				 	VM_BUG_ON(atomic_read(&page->_count) != 0);
			
 
				-	atomic_inc(&page->_mapcount);
			
 
				+	if (compound_tail_refcounted(page->first_page))
			
 
				+		atomic_inc(&page->_mapcount);
			
 
				 }
			
 
				 
			
 
				 extern bool __get_page_tail(struct page *page);
			
@@ -846,11 +884,14 @@ static __always_inline void *lowmem_page_address(const struct page *page)
 
				 #endif
			
 
				 
			
 
				 #if defined(WANT_PAGE_VIRTUAL)
			
 
				-#define page_address(page) ((page)->virtual)
			
 
				-#define set_page_address(page, address)			\
			
 
				-	do {						\
			
 
				-		(page)->virtual = (address);		\
			
 
				-	} while(0)
			
 
				+static inline void *page_address(const struct page *page)
			
 
				+{
			
 
				+	return page->virtual;
			
 
				+}
			
 
				+static inline void set_page_address(struct page *page, void *address)
			
 
				+{
			
 
				+	page->virtual = address;
			
 
				+}
			
 
				 #define page_address_init()  do { } while(0)
			
 
				 #endif
			
 
				 
			
@@ -984,7 +1025,6 @@ extern void pagefault_out_of_memory(void);
 
				  * various contexts.
			
 
				  */
			
 
				 #define SHOW_MEM_FILTER_NODES		(0x0001u)	/* disallowed nodes */
			
 
				-#define SHOW_MEM_FILTER_PAGE_COUNT	(0x0002u)	/* page type count */
			
 
				 
			
 
				 extern void show_free_areas(unsigned int flags);
			
 
				 extern bool skip_free_areas_node(unsigned int flags, int nid);
			
@@ -1318,6 +1358,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
 
				 
			
 
				 #if USE_SPLIT_PTE_PTLOCKS
			
 
				 #if ALLOC_SPLIT_PTLOCKS
			
 
				+void __init ptlock_cache_init(void);
			
 
				 extern bool ptlock_alloc(struct page *page);
			
 
				 extern void ptlock_free(struct page *page);
			
 
				 
			
@@ -1326,6 +1367,10 @@ static inline spinlock_t *ptlock_ptr(struct page *page)
 
				 	return page->ptl;
			
 
				 }
			
 
				 #else /* ALLOC_SPLIT_PTLOCKS */
			
 
				+static inline void ptlock_cache_init(void)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				 static inline bool ptlock_alloc(struct page *page)
			
 
				 {
			
 
				 	return true;
			
@@ -1378,10 +1423,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 
				 {
			
 
				 	return &mm->page_table_lock;
			
 
				 }
			
 
				+static inline void ptlock_cache_init(void) {}
			
 
				 static inline bool ptlock_init(struct page *page) { return true; }
			
 
				 static inline void pte_lock_deinit(struct page *page) {}
			
 
				 #endif /* USE_SPLIT_PTE_PTLOCKS */
			
 
				 
			
 
				+static inline void pgtable_init(void)
			
 
				+{
			
 
				+	ptlock_cache_init();
			
 
				+	pgtable_cache_init();
			
 
				+}
			
 
				+
			
 
				 static inline bool pgtable_page_ctor(struct page *page)
			
 
				 {
			
 
				 	inc_zone_page_state(page, NR_PAGETABLE);
			
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -9,6 +9,7 @@
 
				 
			
 
				 extern int sysctl_overcommit_memory;
			
 
				 extern int sysctl_overcommit_ratio;
			
 
				+extern unsigned long sysctl_overcommit_kbytes;
			
 
				 extern struct percpu_counter vm_committed_as;
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -489,6 +489,12 @@ struct zone {
 
				 	unsigned long		present_pages;
			
 
				 	unsigned long		managed_pages;
			
 
				 
			
 
				+	/*
			
 
				+	 * Number of MIGRATE_RESEVE page block. To maintain for just
			
 
				+	 * optimization. Protected by zone->lock.
			
 
				+	 */
			
 
				+	int			nr_migrate_reserve_block;
			
 
				+
			
 
				 	/*
			
 
				 	 * rarely used fields:
			
 
				 	 */
			
@@ -758,10 +764,7 @@ typedef struct pglist_data {
 
				 	int kswapd_max_order;
			
 
				 	enum zone_type classzone_idx;
			
 
				 #ifdef CONFIG_NUMA_BALANCING
			
 
				-	/*
			
 
				-	 * Lock serializing the per destination node AutoNUMA memory
			
 
				-	 * migration rate limiting data.
			
 
				-	 */
			
 
				+	/* Lock serializing the migrate rate limiting window */
			
 
				 	spinlock_t numabalancing_migrate_lock;
			
 
				 
			
 
				 	/* Rate limiting time interval */
			
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -94,78 +94,12 @@ extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
 
				 extern struct posix_acl *get_posix_acl(struct inode *, int);
			
 
				 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
			
 
				 
			
 
				-#ifdef CONFIG_FS_POSIX_ACL
			
 
				-static inline struct posix_acl **acl_by_type(struct inode *inode, int type)
			
 
				-{
			
 
				-	switch (type) {
			
 
				-	case ACL_TYPE_ACCESS:
			
 
				-		return &inode->i_acl;
			
 
				-	case ACL_TYPE_DEFAULT:
			
 
				-		return &inode->i_default_acl;
			
 
				-	default:
			
 
				-		BUG();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
			
 
				-{
			
 
				-	struct posix_acl **p = acl_by_type(inode, type);
			
 
				-	struct posix_acl *acl = ACCESS_ONCE(*p);
			
 
				-	if (acl) {
			
 
				-		spin_lock(&inode->i_lock);
			
 
				-		acl = *p;
			
 
				-		if (acl != ACL_NOT_CACHED)
			
 
				-			acl = posix_acl_dup(acl);
			
 
				-		spin_unlock(&inode->i_lock);
			
 
				-	}
			
 
				-	return acl;
			
 
				-}
			
 
				-
			
 
				-static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
			
 
				-{
			
 
				-	return rcu_dereference(*acl_by_type(inode, type));
			
 
				-}
			
 
				-
			
 
				-static inline void set_cached_acl(struct inode *inode,
			
 
				-				  int type,
			
 
				-				  struct posix_acl *acl)
			
 
				-{
			
 
				-	struct posix_acl **p = acl_by_type(inode, type);
			
 
				-	struct posix_acl *old;
			
 
				-	spin_lock(&inode->i_lock);
			
 
				-	old = *p;
			
 
				-	rcu_assign_pointer(*p, posix_acl_dup(acl));
			
 
				-	spin_unlock(&inode->i_lock);
			
 
				-	if (old != ACL_NOT_CACHED)
			
 
				-		posix_acl_release(old);
			
 
				-}
			
 
				-
			
 
				-static inline void forget_cached_acl(struct inode *inode, int type)
			
 
				-{
			
 
				-	struct posix_acl **p = acl_by_type(inode, type);
			
 
				-	struct posix_acl *old;
			
 
				-	spin_lock(&inode->i_lock);
			
 
				-	old = *p;
			
 
				-	*p = ACL_NOT_CACHED;
			
 
				-	spin_unlock(&inode->i_lock);
			
 
				-	if (old != ACL_NOT_CACHED)
			
 
				-		posix_acl_release(old);
			
 
				-}
			
 
				-
			
 
				-static inline void forget_all_cached_acls(struct inode *inode)
			
 
				-{
			
 
				-	struct posix_acl *old_access, *old_default;
			
 
				-	spin_lock(&inode->i_lock);
			
 
				-	old_access = inode->i_acl;
			
 
				-	old_default = inode->i_default_acl;
			
 
				-	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
			
 
				-	spin_unlock(&inode->i_lock);
			
 
				-	if (old_access != ACL_NOT_CACHED)
			
 
				-		posix_acl_release(old_access);
			
 
				-	if (old_default != ACL_NOT_CACHED)
			
 
				-		posix_acl_release(old_default);
			
 
				-}
			
 
				-#endif
			
 
				+struct posix_acl **acl_by_type(struct inode *inode, int type);
			
 
				+struct posix_acl *get_cached_acl(struct inode *inode, int type);
			
 
				+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
			
 
				+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
			
 
				+void forget_cached_acl(struct inode *inode, int type);
			
 
				+void forget_all_cached_acls(struct inode *inode);
			
 
				 
			
 
				 static inline void cache_no_acl(struct inode *inode)
			
 
				 {
			
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -184,13 +184,13 @@ static inline void page_dup_rmap(struct page *page)
 
				 int page_referenced(struct page *, int is_locked,
			
 
				 			struct mem_cgroup *memcg, unsigned long *vm_flags);
			
 
				 int page_referenced_one(struct page *, struct vm_area_struct *,
			
 
				-	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
			
 
				+	unsigned long address, void *arg);
			
 
				 
			
 
				 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
			
 
				 
			
 
				 int try_to_unmap(struct page *, enum ttu_flags flags);
			
 
				 int try_to_unmap_one(struct page *, struct vm_area_struct *,
			
 
				-			unsigned long address, enum ttu_flags flags);
			
 
				+			unsigned long address, void *arg);
			
 
				 
			
 
				 /*
			
 
				  * Called from mm/filemap_xip.c to unmap empty zero page
			
@@ -236,10 +236,27 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 
				 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
			
 
				 
			
 
				 /*
			
 
				- * Called by migrate.c to remove migration ptes, but might be used more later.
			
 
				+ * rmap_walk_control: To control rmap traversing for specific needs
			
 
				+ *
			
 
				+ * arg: passed to rmap_one() and invalid_vma()
			
 
				+ * rmap_one: executed on each vma where page is mapped
			
 
				+ * done: for checking traversing termination condition
			
 
				+ * file_nonlinear: for handling file nonlinear mapping
			
 
				+ * anon_lock: for getting anon_lock by optimized way rather than default
			
 
				+ * invalid_vma: for skipping uninterested vma
			
 
				  */
			
 
				-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
			
 
				-		struct vm_area_struct *, unsigned long, void *), void *arg);
			
 
				+struct rmap_walk_control {
			
 
				+	void *arg;
			
 
				+	int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
			
 
				+					unsigned long addr, void *arg);
			
 
				+	int (*done)(struct page *page);
			
 
				+	int (*file_nonlinear)(struct page *, struct address_space *,
			
 
				+					struct vm_area_struct *vma);
			
 
				+	struct anon_vma *(*anon_lock)(struct page *page);
			
 
				+	bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
			
 
				+};
			
 
				+
			
 
				+int rmap_walk(struct page *page, struct rmap_walk_control *rwc);
			
 
				 
			
 
				 #else	/* !CONFIG_MMU */
			
 
				 
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -549,6 +549,7 @@ struct signal_struct {
 
				 	atomic_t		sigcnt;
			
 
				 	atomic_t		live;
			
 
				 	int			nr_threads;
			
 
				+	struct list_head	thread_head;
			
 
				 
			
 
				 	wait_queue_head_t	wait_chldexit;	/* for wait4() */
			
 
				 
			
@@ -1271,6 +1272,7 @@ struct task_struct {
 
				 	/* PID/PID hash table linkage. */
			
 
				 	struct pid_link pids[PIDTYPE_MAX];
			
 
				 	struct list_head thread_group;
			
 
				+	struct list_head thread_node;
			
 
				 
			
 
				 	struct completion *vfork_done;		/* for vfork() */
			
 
				 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
			
@@ -2341,6 +2343,16 @@ extern bool current_is_single_threaded(void);
 
				 #define while_each_thread(g, t) \
			
 
				 	while ((t = next_thread(t)) != g)
			
 
				 
			
 
				+#define __for_each_thread(signal, t)	\
			
 
				+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
			
 
				+
			
 
				+#define for_each_thread(p, t)		\
			
 
				+	__for_each_thread((p)->signal, t)
			
 
				+
			
 
				+/* Careful: this is a double loop, 'break' won't work as expected. */
			
 
				+#define for_each_process_thread(p, t)	\
			
 
				+	for_each_process(p) for_each_thread(p, t)
			
 
				+
			
 
				 static inline int get_nr_threads(struct task_struct *tsk)
			
 
				 {
			
 
				 	return tsk->signal->nr_threads;
			
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -67,6 +67,48 @@ TRACE_EVENT(mm_compaction_migratepages,
 
				 		__entry->nr_failed)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(mm_compaction_begin,
			
 
				+	TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
			
 
				+		unsigned long free_start, unsigned long zone_end),
			
 
				+
			
 
				+	TP_ARGS(zone_start, migrate_start, free_start, zone_end),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(unsigned long, zone_start)
			
 
				+		__field(unsigned long, migrate_start)
			
 
				+		__field(unsigned long, free_start)
			
 
				+		__field(unsigned long, zone_end)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->zone_start = zone_start;
			
 
				+		__entry->migrate_start = migrate_start;
			
 
				+		__entry->free_start = free_start;
			
 
				+		__entry->zone_end = zone_end;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
			
 
				+		__entry->zone_start,
			
 
				+		__entry->migrate_start,
			
 
				+		__entry->free_start,
			
 
				+		__entry->zone_end)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(mm_compaction_end,
			
 
				+	TP_PROTO(int status),
			
 
				+
			
 
				+	TP_ARGS(status),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(int, status)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->status = status;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("status=%d", __entry->status)
			
 
				+);
			
 
				 
			
 
				 #endif /* _TRACE_COMPACTION_H */
			
 
				 
			
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -45,6 +45,32 @@ TRACE_EVENT(mm_migrate_pages,
 
				 		__print_symbolic(__entry->reason, MIGRATE_REASON))
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(mm_numa_migrate_ratelimit,
			
 
				+
			
 
				+	TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
			
 
				+
			
 
				+	TP_ARGS(p, dst_nid, nr_pages),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__array(	char,		comm,	TASK_COMM_LEN)
			
 
				+		__field(	pid_t,		pid)
			
 
				+		__field(	int,		dst_nid)
			
 
				+		__field(	unsigned long,	nr_pages)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
			
 
				+		__entry->pid		= p->pid;
			
 
				+		__entry->dst_nid	= dst_nid;
			
 
				+		__entry->nr_pages	= nr_pages;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
			
 
				+		__entry->comm,
			
 
				+		__entry->pid,
			
 
				+		__entry->dst_nid,
			
 
				+		__entry->nr_pages)
			
 
				+);
			
 
				 #endif /* _TRACE_MIGRATE_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -443,6 +443,93 @@ TRACE_EVENT(sched_process_hang,
 
				 );
			
 
				 #endif /* CONFIG_DETECT_HUNG_TASK */
			
 
				 
			
 
				+DECLARE_EVENT_CLASS(sched_move_task_template,
			
 
				+
			
 
				+	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
			
 
				+
			
 
				+	TP_ARGS(tsk, src_cpu, dst_cpu),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field( pid_t,	pid			)
			
 
				+		__field( pid_t,	tgid			)
			
 
				+		__field( pid_t,	ngid			)
			
 
				+		__field( int,	src_cpu			)
			
 
				+		__field( int,	src_nid			)
			
 
				+		__field( int,	dst_cpu			)
			
 
				+		__field( int,	dst_nid			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->pid		= task_pid_nr(tsk);
			
 
				+		__entry->tgid		= task_tgid_nr(tsk);
			
 
				+		__entry->ngid		= task_numa_group_id(tsk);
			
 
				+		__entry->src_cpu	= src_cpu;
			
 
				+		__entry->src_nid	= cpu_to_node(src_cpu);
			
 
				+		__entry->dst_cpu	= dst_cpu;
			
 
				+		__entry->dst_nid	= cpu_to_node(dst_cpu);
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
			
 
				+			__entry->pid, __entry->tgid, __entry->ngid,
			
 
				+			__entry->src_cpu, __entry->src_nid,
			
 
				+			__entry->dst_cpu, __entry->dst_nid)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracks migration of tasks from one runqueue to another. Can be used to
			
 
				+ * detect if automatic NUMA balancing is bouncing between nodes
			
 
				+ */
			
 
				+DEFINE_EVENT(sched_move_task_template, sched_move_numa,
			
 
				+	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
			
 
				+
			
 
				+	TP_ARGS(tsk, src_cpu, dst_cpu)
			
 
				+);
			
 
				+
			
 
				+DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
			
 
				+	TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
			
 
				+
			
 
				+	TP_ARGS(tsk, src_cpu, dst_cpu)
			
 
				+);
			
 
				+
			
 
				+TRACE_EVENT(sched_swap_numa,
			
 
				+
			
 
				+	TP_PROTO(struct task_struct *src_tsk, int src_cpu,
			
 
				+		 struct task_struct *dst_tsk, int dst_cpu),
			
 
				+
			
 
				+	TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field( pid_t,	src_pid			)
			
 
				+		__field( pid_t,	src_tgid		)
			
 
				+		__field( pid_t,	src_ngid		)
			
 
				+		__field( int,	src_cpu			)
			
 
				+		__field( int,	src_nid			)
			
 
				+		__field( pid_t,	dst_pid			)
			
 
				+		__field( pid_t,	dst_tgid		)
			
 
				+		__field( pid_t,	dst_ngid		)
			
 
				+		__field( int,	dst_cpu			)
			
 
				+		__field( int,	dst_nid			)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->src_pid	= task_pid_nr(src_tsk);
			
 
				+		__entry->src_tgid	= task_tgid_nr(src_tsk);
			
 
				+		__entry->src_ngid	= task_numa_group_id(src_tsk);
			
 
				+		__entry->src_cpu	= src_cpu;
			
 
				+		__entry->src_nid	= cpu_to_node(src_cpu);
			
 
				+		__entry->dst_pid	= task_pid_nr(dst_tsk);
			
 
				+		__entry->dst_tgid	= task_tgid_nr(dst_tsk);
			
 
				+		__entry->dst_ngid	= task_numa_group_id(dst_tsk);
			
 
				+		__entry->dst_cpu	= dst_cpu;
			
 
				+		__entry->dst_nid	= cpu_to_node(dst_cpu);
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
			
 
				+			__entry->src_pid, __entry->src_tgid, __entry->src_ngid,
			
 
				+			__entry->src_cpu, __entry->src_nid,
			
 
				+			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
			
 
				+			__entry->dst_cpu, __entry->dst_nid)
			
 
				+);
			
 
				 #endif /* _TRACE_SCHED_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/init/main.c
+++ b/init/main.c
@@ -355,9 +355,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
				  */
			
 
				 static void __init setup_command_line(char *command_line)
			
 
				 {
			
 
				-	saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
			
 
				-	initcall_command_line = alloc_bootmem(strlen (boot_command_line)+1);
			
 
				-	static_command_line = alloc_bootmem(strlen (command_line)+1);
			
 
				+	saved_command_line =
			
 
				+		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
			
 
				+	initcall_command_line =
			
 
				+		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
			
 
				+	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
			
 
				 	strcpy (saved_command_line, boot_command_line);
			
 
				 	strcpy (static_command_line, command_line);
			
 
				 }
			
@@ -476,7 +478,7 @@ static void __init mm_init(void)
 
				 	mem_init();
			
 
				 	kmem_cache_init();
			
 
				 	percpu_init_late();
			
 
				-	pgtable_cache_init();
			
 
				+	pgtable_init();
			
 
				 	vmalloc_init();
			
 
				 }
			
 
				 
			
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -912,12 +912,13 @@ static void evict_chunk(struct audit_chunk *chunk)
 
				 }
			
 
				 
			
 
				 static int audit_tree_handle_event(struct fsnotify_group *group,
			
 
				+				   struct inode *to_tell,
			
 
				 				   struct fsnotify_mark *inode_mark,
			
 
				-				   struct fsnotify_mark *vfsmonut_mark,
			
 
				-				   struct fsnotify_event *event)
			
 
				+				   struct fsnotify_mark *vfsmount_mark,
			
 
				+				   u32 mask, void *data, int data_type,
			
 
				+				   const unsigned char *file_name)
			
 
				 {
			
 
				-	BUG();
			
 
				-	return -EOPNOTSUPP;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
			
@@ -933,19 +934,8 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify
 
				 	BUG_ON(atomic_read(&entry->refcnt) < 1);
			
 
				 }
			
 
				 
			
 
				-static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode,
			
 
				-				  struct fsnotify_mark *inode_mark,
			
 
				-				  struct fsnotify_mark *vfsmount_mark,
			
 
				-				  __u32 mask, void *data, int data_type)
			
 
				-{
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				 static const struct fsnotify_ops audit_tree_ops = {
			
 
				 	.handle_event = audit_tree_handle_event,
			
 
				-	.should_send_event = audit_tree_send_event,
			
 
				-	.free_group_priv = NULL,
			
 
				-	.free_event_priv = NULL,
			
 
				 	.freeing_mark = audit_tree_freeing_mark,
			
 
				 };