浏览代码

Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton:

 - a few misc things

 - ocfs2 updates

 - most of MM

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (132 commits)
  hugetlbfs: dirty pages as they are added to pagecache
  mm: export add_swap_extent()
  mm: split SWP_FILE into SWP_ACTIVATED and SWP_FS
  tools/testing/selftests/vm/map_fixed_noreplace.c: add test for MAP_FIXED_NOREPLACE
  mm: thp: relocate flush_cache_range() in migrate_misplaced_transhuge_page()
  mm: thp: fix mmu_notifier in migrate_misplaced_transhuge_page()
  mm: thp: fix MADV_DONTNEED vs migrate_misplaced_transhuge_page race condition
  mm/kasan/quarantine.c: make quarantine_lock a raw_spinlock_t
  mm/gup: cache dev_pagemap while pinning pages
  Revert "x86/e820: put !E820_TYPE_RAM regions into memblock.reserved"
  mm: return zero_resv_unavail optimization
  mm: zero remaining unavailable struct pages
  tools/testing/selftests/vm/gup_benchmark.c: add MAP_HUGETLB option
  tools/testing/selftests/vm/gup_benchmark.c: add MAP_SHARED option
  tools/testing/selftests/vm/gup_benchmark.c: allow user specified file
  tools/testing/selftests/vm/gup_benchmark.c: fix 'write' flag usage
  mm/gup_benchmark.c: add additional pinning methods
  mm/gup_benchmark.c: time put_page()
  mm: don't raise MEMCG_OOM event due to failed high-order allocation
  mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock
  ...
Linus Torvalds 6 年之前
父节点
当前提交
345671ea0f
共有 100 个文件被更改,包括 778 次插入990 次删除
  1. 73 0
      Documentation/accounting/psi.txt
  2. 22 0
      Documentation/admin-guide/cgroup-v2.rst
  3. 12 0
      Documentation/admin-guide/kernel-parameters.txt
  4. 4 0
      Documentation/filesystems/proc.txt
  5. 9 3
      Documentation/vm/slub.rst
  6. 2 2
      Documentation/x86/pat.txt
  7. 2 0
      arch/alpha/Kconfig
  8. 2 2
      arch/alpha/kernel/core_irongate.c
  9. 12 86
      arch/alpha/kernel/setup.c
  10. 13 100
      arch/alpha/mm/numa.c
  11. 1 31
      arch/arm/include/asm/hugetlb-3level.h
  12. 1 32
      arch/arm/include/asm/hugetlb.h
  13. 7 32
      arch/arm64/include/asm/hugetlb.h
  14. 8 6
      arch/arm64/include/asm/string.h
  15. 5 2
      arch/arm64/kernel/arm64ksyms.c
  16. 1 1
      arch/arm64/lib/memchr.S
  17. 1 1
      arch/arm64/lib/memcmp.S
  18. 1 1
      arch/arm64/lib/strchr.S
  19. 1 1
      arch/arm64/lib/strcmp.S
  20. 1 1
      arch/arm64/lib/strlen.S
  21. 1 1
      arch/arm64/lib/strncmp.S
  22. 1 1
      arch/arm64/lib/strnlen.S
  23. 1 1
      arch/arm64/lib/strrchr.S
  24. 3 0
      arch/hexagon/Kconfig
  25. 8 12
      arch/hexagon/mm/init.c
  26. 5 42
      arch/ia64/include/asm/hugetlb.h
  27. 0 1
      arch/ia64/include/asm/pgtable.h
  28. 7 33
      arch/mips/include/asm/hugetlb.h
  29. 3 0
      arch/nios2/Kconfig
  30. 0 17
      arch/nios2/kernel/prom.c
  31. 7 32
      arch/nios2/kernel/setup.c
  32. 8 25
      arch/parisc/include/asm/hugetlb.h
  33. 0 6
      arch/powerpc/include/asm/book3s/32/pgtable.h
  34. 1 0
      arch/powerpc/include/asm/book3s/64/pgtable.h
  35. 6 37
      arch/powerpc/include/asm/hugetlb.h
  36. 0 6
      arch/powerpc/include/asm/nohash/32/pgtable.h
  37. 1 0
      arch/powerpc/include/asm/nohash/64/pgtable.h
  38. 1 1
      arch/powerpc/platforms/cell/cpufreq_spudemand.c
  39. 3 6
      arch/powerpc/platforms/cell/spufs/sched.c
  40. 0 4
      arch/s390/appldata/appldata_os.c
  41. 4 50
      arch/sh/include/asm/hugetlb.h
  42. 8 32
      arch/sparc/include/asm/hugetlb.h
  43. 2 0
      arch/um/Kconfig
  44. 10 12
      arch/um/kernel/physmem.c
  45. 1 0
      arch/unicore32/Kconfig
  46. 1 53
      arch/unicore32/mm/init.c
  47. 9 15
      arch/x86/entry/vdso/vma.c
  48. 0 69
      arch/x86/include/asm/hugetlb.h
  49. 3 12
      arch/x86/kernel/e820.c
  50. 1 0
      arch/xtensa/include/asm/Kbuild
  51. 0 19
      arch/xtensa/include/asm/vga.h
  52. 5 3
      block/blk-iolatency.c
  53. 12 7
      drivers/base/node.c
  54. 0 4
      drivers/cpuidle/governors/menu.c
  55. 0 1
      drivers/infiniband/hw/hfi1/mmu_rb.c
  56. 0 1
      drivers/iommu/amd_iommu_v2.c
  57. 0 1
      drivers/iommu/intel-svm.c
  58. 0 1
      drivers/misc/sgi-gru/grutlbpurge.c
  59. 6 5
      drivers/of/fdt.c
  60. 4 4
      drivers/staging/android/ion/ion_page_pool.c
  61. 4 1
      fs/cramfs/inode.c
  62. 9 29
      fs/dcache.c
  63. 1 1
      fs/iomap.c
  64. 3 0
      fs/kernfs/mount.c
  65. 0 4
      fs/ocfs2/alloc.c
  66. 1 2
      fs/ocfs2/aops.c
  67. 1 1
      fs/ocfs2/dlm/dlmdebug.c
  68. 1 1
      fs/ocfs2/dlm/dlmthread.c
  69. 0 2
      fs/ocfs2/refcounttree.c
  70. 3 0
      fs/proc/inode.c
  71. 0 3
      fs/proc/loadavg.c
  72. 8 8
      fs/proc/meminfo.c
  73. 3 1
      fs/proc/task_mmu.c
  74. 4 4
      fs/userfaultfd.c
  75. 87 1
      include/asm-generic/hugetlb.h
  76. 2 2
      include/asm-generic/pgtable.h
  77. 4 0
      include/linux/cgroup-defs.h
  78. 15 0
      include/linux/cgroup.h
  79. 23 0
      include/linux/delayacct.h
  80. 1 1
      include/linux/hmm.h
  81. 4 4
      include/linux/huge_mm.h
  82. 3 1
      include/linux/iomap.h
  83. 1 0
      include/linux/linkage.h
  84. 3 0
      include/linux/math64.h
  85. 0 15
      include/linux/memblock.h
  86. 13 2
      include/linux/memcontrol.h
  87. 9 39
      include/linux/mm.h
  88. 3 24
      include/linux/mmu_notifier.h
  89. 3 1
      include/linux/mmzone.h
  90. 13 1
      include/linux/page-flags.h
  91. 3 1
      include/linux/pfn_t.h
  92. 53 0
      include/linux/psi.h
  93. 92 0
      include/linux/psi_types.h
  94. 10 3
      include/linux/sched.h
  95. 20 4
      include/linux/sched/loadavg.h
  96. 45 11
      include/linux/slab.h
  97. 8 7
      include/linux/swap.h
  98. 1 0
      include/trace/events/mmflags.h
  99. 5 1
      include/uapi/linux/taskstats.h
  100. 19 0
      init/Kconfig

+ 73 - 0
Documentation/accounting/psi.txt

@@ -0,0 +1,73 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios are tracked as recent trends over ten, sixty, and three
+hundred second windows, which gives insight into short term events as
+well as medium and long term trends. The total absolute stall time is
+tracked and exported as well, to allow detection of latency spikes
+which wouldn't necessarily make a dent in the time averages, or to
+average trends over custom time frames.
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.

+ 22 - 0
Documentation/admin-guide/cgroup-v2.rst

@@ -966,6 +966,12 @@ All time durations are in microseconds.
 	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
 	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
 	one number is written, $MAX is updated.
 	one number is written, $MAX is updated.
 
 
+  cpu.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for CPU. See
+	Documentation/accounting/psi.txt for details.
+
 
 
 Memory
 Memory
 ------
 ------
@@ -1127,6 +1133,10 @@ PAGE_SIZE multiple when read back.
 		disk readahead.  For now OOM in memory cgroup kills
 		disk readahead.  For now OOM in memory cgroup kills
 		tasks iff shortage has happened inside page fault.
 		tasks iff shortage has happened inside page fault.
 
 
+		This event is not raised if the OOM killer is not
+		considered as an option, e.g. for failed high-order
+		allocations.
+
 	  oom_kill
 	  oom_kill
 		The number of processes belonging to this cgroup
 		The number of processes belonging to this cgroup
 		killed by any kind of OOM killer.
 		killed by any kind of OOM killer.
@@ -1271,6 +1281,12 @@ PAGE_SIZE multiple when read back.
 	higher than the limit for an extended period of time.  This
 	higher than the limit for an extended period of time.  This
 	reduces the impact on the workload and memory management.
 	reduces the impact on the workload and memory management.
 
 
+  memory.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for memory. See
+	Documentation/accounting/psi.txt for details.
+
 
 
 Usage Guidelines
 Usage Guidelines
 ~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~
@@ -1408,6 +1424,12 @@ IO Interface Files
 
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 
 
+  io.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for IO. See
+	Documentation/accounting/psi.txt for details.
+
 
 
 Writeback
 Writeback
 ~~~~~~~~~
 ~~~~~~~~~

+ 12 - 0
Documentation/admin-guide/kernel-parameters.txt

@@ -4851,6 +4851,18 @@
 			This is actually a boot loader parameter; the value is
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
 			passed to the kernel using a special protocol.
 
 
+	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
+			May slow down system boot speed, especially when
+			enabled on systems with a large amount of memory.
+			All options are enabled by default, and this
+			interface is meant to allow for selectively
+			enabling or disabling specific virtual memory
+			debugging features.
+
+			Available options are:
+			  P	Enable page structure init time poisoning
+			  -	Disable all of the above options
+
 	vmalloc=nn[KMG]	[KNL,BOOT] Forces the vmalloc area to have an exact
 	vmalloc=nn[KMG]	[KNL,BOOT] Forces the vmalloc area to have an exact
 			size of <nn>. This can be used to increase the
 			size of <nn>. This can be used to increase the
 			minimum size (128MB on x86). It can also be used to
 			minimum size (128MB on x86). It can also be used to

+ 4 - 0
Documentation/filesystems/proc.txt

@@ -858,6 +858,7 @@ Writeback:           0 kB
 AnonPages:      861800 kB
 AnonPages:      861800 kB
 Mapped:         280372 kB
 Mapped:         280372 kB
 Shmem:             644 kB
 Shmem:             644 kB
+KReclaimable:   168048 kB
 Slab:           284364 kB
 Slab:           284364 kB
 SReclaimable:   159856 kB
 SReclaimable:   159856 kB
 SUnreclaim:     124508 kB
 SUnreclaim:     124508 kB
@@ -925,6 +926,9 @@ AnonHugePages: Non-file backed huge pages mapped into userspace page tables
 ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
 ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated
               with huge pages
               with huge pages
 ShmemPmdMapped: Shared memory mapped into userspace with huge pages
 ShmemPmdMapped: Shared memory mapped into userspace with huge pages
+KReclaimable: Kernel allocations that the kernel will attempt to reclaim
+              under memory pressure. Includes SReclaimable (below), and other
+              direct allocations with a shrinker.
         Slab: in-kernel data structures cache
         Slab: in-kernel data structures cache
 SReclaimable: Part of Slab, that might be reclaimed, such as caches
 SReclaimable: Part of Slab, that might be reclaimed, such as caches
   SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
   SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure

+ 9 - 3
Documentation/vm/slub.rst

@@ -36,9 +36,10 @@ debugging is enabled. Format:
 
 
 slub_debug=<Debug-Options>
 slub_debug=<Debug-Options>
 	Enable options for all slabs
 	Enable options for all slabs
-slub_debug=<Debug-Options>,<slab name>
-	Enable options only for select slabs
 
 
+slub_debug=<Debug-Options>,<slab name1>,<slab name2>,...
+	Enable options only for select slabs (no spaces
+	after a comma)
 
 
 Possible debug options are::
 Possible debug options are::
 
 
@@ -62,7 +63,12 @@ Trying to find an issue in the dentry cache? Try::
 
 
 	slub_debug=,dentry
 	slub_debug=,dentry
 
 
-to only enable debugging on the dentry cache.
+to only enable debugging on the dentry cache.  You may use an asterisk at the
+end of the slab name, in order to cover all slabs with the same prefix.  For
+example, here's how you can poison the dentry cache as well as all kmalloc
+slabs:
+
+	slub_debug=P,kmalloc-*,dentry
 
 
 Red zoning and tracking may realign the slab.  We can just apply sanity checks
 Red zoning and tracking may realign the slab.  We can just apply sanity checks
 to the dentry cache with::
 to the dentry cache with::

+ 2 - 2
Documentation/x86/pat.txt

@@ -90,12 +90,12 @@ pci proc               |    --    |    --      |       WC         |
 Advanced APIs for drivers
 Advanced APIs for drivers
 -------------------------
 -------------------------
 A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
 A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
-vm_insert_pfn
+vmf_insert_pfn
 
 
 Drivers wanting to export some pages to userspace do it by using mmap
 Drivers wanting to export some pages to userspace do it by using mmap
 interface and a combination of
 interface and a combination of
 1) pgprot_noncached()
 1) pgprot_noncached()
-2) io_remap_pfn_range() or remap_pfn_range() or vm_insert_pfn()
+2) io_remap_pfn_range() or remap_pfn_range() or vmf_insert_pfn()
 
 
 With PAT support, a new API pgprot_writecombine is being added. So, drivers can
 With PAT support, a new API pgprot_writecombine is being added. So, drivers can
 continue to use the above sequence, with either pgprot_noncached() or
 continue to use the above sequence, with either pgprot_noncached() or

+ 2 - 0
arch/alpha/Kconfig

@@ -31,6 +31,8 @@ config ALPHA
 	select ODD_RT_SIGACTION
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
 	help
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
 	  marketed by the Digital Equipment Corporation of blessed memory,

+ 2 - 2
arch/alpha/kernel/core_irongate.c

@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
@@ -241,8 +242,7 @@ albacore_init_arch(void)
 				       size / 1024);
 				       size / 1024);
 		}
 		}
 #endif
 #endif
-		reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop -
-				pci_mem, BOOTMEM_DEFAULT);
+		memblock_reserve(pci_mem, memtop - pci_mem);
 		printk("irongate_init_arch: temporarily reserving "
 		printk("irongate_init_arch: temporarily reserving "
 			"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
 			"region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
 	}
 	}

+ 12 - 86
arch/alpha/kernel/setup.c

@@ -30,6 +30,7 @@
 #include <linux/ioport.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/root_dev.h>
@@ -312,9 +313,7 @@ setup_memory(void *kernel_end)
 {
 {
 	struct memclust_struct * cluster;
 	struct memclust_struct * cluster;
 	struct memdesc_struct * memdesc;
 	struct memdesc_struct * memdesc;
-	unsigned long start_kernel_pfn, end_kernel_pfn;
-	unsigned long bootmap_size, bootmap_pages, bootmap_start;
-	unsigned long start, end;
+	unsigned long kernel_size;
 	unsigned long i;
 	unsigned long i;
 
 
 	/* Find free clusters, and init and free the bootmem accordingly.  */
 	/* Find free clusters, and init and free the bootmem accordingly.  */
@@ -322,6 +321,8 @@ setup_memory(void *kernel_end)
 	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
 	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
 
 
 	for_each_mem_cluster(memdesc, cluster, i) {
 	for_each_mem_cluster(memdesc, cluster, i) {
+		unsigned long end;
+
 		printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
 		printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
 		       i, cluster->usage, cluster->start_pfn,
 		       i, cluster->usage, cluster->start_pfn,
 		       cluster->start_pfn + cluster->numpages);
 		       cluster->start_pfn + cluster->numpages);
@@ -335,6 +336,9 @@ setup_memory(void *kernel_end)
 		end = cluster->start_pfn + cluster->numpages;
 		end = cluster->start_pfn + cluster->numpages;
 		if (end > max_low_pfn)
 		if (end > max_low_pfn)
 			max_low_pfn = end;
 			max_low_pfn = end;
+
+		memblock_add(PFN_PHYS(cluster->start_pfn),
+			     cluster->numpages << PAGE_SHIFT);
 	}
 	}
 
 
 	/*
 	/*
@@ -363,87 +367,9 @@ setup_memory(void *kernel_end)
 		max_low_pfn = mem_size_limit;
 		max_low_pfn = mem_size_limit;
 	}
 	}
 
 
-	/* Find the bounds of kernel memory.  */
-	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
-	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-	bootmap_start = -1;
-
- try_again:
-	if (max_low_pfn <= end_kernel_pfn)
-		panic("not enough memory to boot");
-
-	/* We need to know how many physically contiguous pages
-	   we'll need for the bootmap.  */
-	bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
-
-	/* Now find a good region where to allocate the bootmap.  */
-	for_each_mem_cluster(memdesc, cluster, i) {
-		if (cluster->usage & 3)
-			continue;
-
-		start = cluster->start_pfn;
-		end = start + cluster->numpages;
-		if (start >= max_low_pfn)
-			continue;
-		if (end > max_low_pfn)
-			end = max_low_pfn;
-		if (start < start_kernel_pfn) {
-			if (end > end_kernel_pfn
-			    && end - end_kernel_pfn >= bootmap_pages) {
-				bootmap_start = end_kernel_pfn;
-				break;
-			} else if (end > start_kernel_pfn)
-				end = start_kernel_pfn;
-		} else if (start < end_kernel_pfn)
-			start = end_kernel_pfn;
-		if (end - start >= bootmap_pages) {
-			bootmap_start = start;
-			break;
-		}
-	}
-
-	if (bootmap_start == ~0UL) {
-		max_low_pfn >>= 1;
-		goto try_again;
-	}
-
-	/* Allocate the bootmap and mark the whole MM as reserved.  */
-	bootmap_size = init_bootmem(bootmap_start, max_low_pfn);
-
-	/* Mark the free regions.  */
-	for_each_mem_cluster(memdesc, cluster, i) {
-		if (cluster->usage & 3)
-			continue;
-
-		start = cluster->start_pfn;
-		end = cluster->start_pfn + cluster->numpages;
-		if (start >= max_low_pfn)
-			continue;
-		if (end > max_low_pfn)
-			end = max_low_pfn;
-		if (start < start_kernel_pfn) {
-			if (end > end_kernel_pfn) {
-				free_bootmem(PFN_PHYS(start),
-					     (PFN_PHYS(start_kernel_pfn)
-					      - PFN_PHYS(start)));
-				printk("freeing pages %ld:%ld\n",
-				       start, start_kernel_pfn);
-				start = end_kernel_pfn;
-			} else if (end > start_kernel_pfn)
-				end = start_kernel_pfn;
-		} else if (start < end_kernel_pfn)
-			start = end_kernel_pfn;
-		if (start >= end)
-			continue;
-
-		free_bootmem(PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
-		printk("freeing pages %ld:%ld\n", start, end);
-	}
-
-	/* Reserve the bootmap memory.  */
-	reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size,
-			BOOTMEM_DEFAULT);
-	printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
+	/* Reserve the kernel memory. */
+	kernel_size = virt_to_phys(kernel_end) - KERNEL_START_PHYS;
+	memblock_reserve(KERNEL_START_PHYS, kernel_size);
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
 	initrd_start = INITRD_START;
 	initrd_start = INITRD_START;
@@ -459,8 +385,8 @@ setup_memory(void *kernel_end)
 				       initrd_end,
 				       initrd_end,
 				       phys_to_virt(PFN_PHYS(max_low_pfn)));
 				       phys_to_virt(PFN_PHYS(max_low_pfn)));
 		} else {
 		} else {
-			reserve_bootmem(virt_to_phys((void *)initrd_start),
-					INITRD_SIZE, BOOTMEM_DEFAULT);
+			memblock_reserve(virt_to_phys((void *)initrd_start),
+					INITRD_SIZE);
 		}
 		}
 	}
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 #endif /* CONFIG_BLK_DEV_INITRD */

+ 13 - 100
arch/alpha/mm/numa.c

@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/swap.h>
 #include <linux/initrd.h>
 #include <linux/initrd.h>
 #include <linux/pfn.h>
 #include <linux/pfn.h>
@@ -59,12 +60,10 @@ setup_memory_node(int nid, void *kernel_end)
 	struct memclust_struct * cluster;
 	struct memclust_struct * cluster;
 	struct memdesc_struct * memdesc;
 	struct memdesc_struct * memdesc;
 	unsigned long start_kernel_pfn, end_kernel_pfn;
 	unsigned long start_kernel_pfn, end_kernel_pfn;
-	unsigned long bootmap_size, bootmap_pages, bootmap_start;
 	unsigned long start, end;
 	unsigned long start, end;
 	unsigned long node_pfn_start, node_pfn_end;
 	unsigned long node_pfn_start, node_pfn_end;
 	unsigned long node_min_pfn, node_max_pfn;
 	unsigned long node_min_pfn, node_max_pfn;
 	int i;
 	int i;
-	unsigned long node_datasz = PFN_UP(sizeof(pg_data_t));
 	int show_init = 0;
 	int show_init = 0;
 
 
 	/* Find the bounds of current node */
 	/* Find the bounds of current node */
@@ -134,24 +133,14 @@ setup_memory_node(int nid, void *kernel_end)
 	/* Cute trick to make sure our local node data is on local memory */
 	/* Cute trick to make sure our local node data is on local memory */
 	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
 	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
 #endif
 #endif
-	/* Quasi-mark the pg_data_t as in-use */
-	node_min_pfn += node_datasz;
-	if (node_min_pfn >= node_max_pfn) {
-		printk(" not enough mem to reserve NODE_DATA");
-		return;
-	}
-	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
-
 	printk(" Detected node memory:   start %8lu, end %8lu\n",
 	printk(" Detected node memory:   start %8lu, end %8lu\n",
 	       node_min_pfn, node_max_pfn);
 	       node_min_pfn, node_max_pfn);
 
 
 	DBGDCONT(" DISCONTIG: node_data[%d]   is at 0x%p\n", nid, NODE_DATA(nid));
 	DBGDCONT(" DISCONTIG: node_data[%d]   is at 0x%p\n", nid, NODE_DATA(nid));
-	DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata);
 
 
 	/* Find the bounds of kernel memory.  */
 	/* Find the bounds of kernel memory.  */
 	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
 	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
 	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
 	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
-	bootmap_start = -1;
 
 
 	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
 	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
 		panic("kernel loaded out of ram");
 		panic("kernel loaded out of ram");
@@ -161,89 +150,11 @@ setup_memory_node(int nid, void *kernel_end)
 	   has much larger alignment than 8Mb, so it's safe. */
 	   has much larger alignment than 8Mb, so it's safe. */
 	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
 	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
 
 
-	/* We need to know how many physically contiguous pages
-	   we'll need for the bootmap.  */
-	bootmap_pages = bootmem_bootmap_pages(node_max_pfn-node_min_pfn);
-
-	/* Now find a good region where to allocate the bootmap.  */
-	for_each_mem_cluster(memdesc, cluster, i) {
-		if (cluster->usage & 3)
-			continue;
-
-		start = cluster->start_pfn;
-		end = start + cluster->numpages;
-
-		if (start >= node_max_pfn || end <= node_min_pfn)
-			continue;
-
-		if (end > node_max_pfn)
-			end = node_max_pfn;
-		if (start < node_min_pfn)
-			start = node_min_pfn;
-
-		if (start < start_kernel_pfn) {
-			if (end > end_kernel_pfn
-			    && end - end_kernel_pfn >= bootmap_pages) {
-				bootmap_start = end_kernel_pfn;
-				break;
-			} else if (end > start_kernel_pfn)
-				end = start_kernel_pfn;
-		} else if (start < end_kernel_pfn)
-			start = end_kernel_pfn;
-		if (end - start >= bootmap_pages) {
-			bootmap_start = start;
-			break;
-		}
-	}
-
-	if (bootmap_start == -1)
-		panic("couldn't find a contiguous place for the bootmap");
-
-	/* Allocate the bootmap and mark the whole MM as reserved.  */
-	bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start,
-					 node_min_pfn, node_max_pfn);
-	DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n",
-		 bootmap_start, bootmap_size, bootmap_pages);
+	memblock_add(PFN_PHYS(node_min_pfn),
+		     (node_max_pfn - node_min_pfn) << PAGE_SHIFT);
 
 
-	/* Mark the free regions.  */
-	for_each_mem_cluster(memdesc, cluster, i) {
-		if (cluster->usage & 3)
-			continue;
-
-		start = cluster->start_pfn;
-		end = cluster->start_pfn + cluster->numpages;
-
-		if (start >= node_max_pfn || end <= node_min_pfn)
-			continue;
-
-		if (end > node_max_pfn)
-			end = node_max_pfn;
-		if (start < node_min_pfn)
-			start = node_min_pfn;
-
-		if (start < start_kernel_pfn) {
-			if (end > end_kernel_pfn) {
-				free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start),
-					     (PFN_PHYS(start_kernel_pfn)
-					      - PFN_PHYS(start)));
-				printk(" freeing pages %ld:%ld\n",
-				       start, start_kernel_pfn);
-				start = end_kernel_pfn;
-			} else if (end > start_kernel_pfn)
-				end = start_kernel_pfn;
-		} else if (start < end_kernel_pfn)
-			start = end_kernel_pfn;
-		if (start >= end)
-			continue;
-
-		free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
-		printk(" freeing pages %ld:%ld\n", start, end);
-	}
-
-	/* Reserve the bootmap memory.  */
-	reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start),
-			bootmap_size, BOOTMEM_DEFAULT);
-	printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
+	NODE_DATA(nid)->node_start_pfn = node_min_pfn;
+	NODE_DATA(nid)->node_present_pages = node_max_pfn - node_min_pfn;
 
 
 	node_set_online(nid);
 	node_set_online(nid);
 }
 }
@@ -251,6 +162,7 @@ setup_memory_node(int nid, void *kernel_end)
 void __init
 void __init
 setup_memory(void *kernel_end)
 setup_memory(void *kernel_end)
 {
 {
+	unsigned long kernel_size;
 	int nid;
 	int nid;
 
 
 	show_mem_layout();
 	show_mem_layout();
@@ -262,6 +174,9 @@ setup_memory(void *kernel_end)
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		setup_memory_node(nid, kernel_end);
 		setup_memory_node(nid, kernel_end);
 
 
+	kernel_size = virt_to_phys(kernel_end) - KERNEL_START_PHYS;
+	memblock_reserve(KERNEL_START_PHYS, kernel_size);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
 	initrd_start = INITRD_START;
 	initrd_start = INITRD_START;
 	if (initrd_start) {
 	if (initrd_start) {
@@ -279,9 +194,8 @@ setup_memory(void *kernel_end)
 				       phys_to_virt(PFN_PHYS(max_low_pfn)));
 				       phys_to_virt(PFN_PHYS(max_low_pfn)));
 		} else {
 		} else {
 			nid = kvaddr_to_nid(initrd_start);
 			nid = kvaddr_to_nid(initrd_start);
-			reserve_bootmem_node(NODE_DATA(nid),
-					     virt_to_phys((void *)initrd_start),
-					     INITRD_SIZE, BOOTMEM_DEFAULT);
+			memblock_reserve(virt_to_phys((void *)initrd_start),
+					 INITRD_SIZE);
 		}
 		}
 	}
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 #endif /* CONFIG_BLK_DEV_INITRD */
@@ -303,9 +217,8 @@ void __init paging_init(void)
 	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
 
 	for_each_online_node(nid) {
 	for_each_online_node(nid) {
-		bootmem_data_t *bdata = &bootmem_node_data[nid];
-		unsigned long start_pfn = bdata->node_min_pfn;
-		unsigned long end_pfn = bdata->node_low_pfn;
+		unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
+		unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_present_pages;
 
 
 		if (dma_local_pfn >= end_pfn - start_pfn)
 		if (dma_local_pfn >= end_pfn - start_pfn)
 			zones_size[ZONE_DMA] = end_pfn - start_pfn;
 			zones_size[ZONE_DMA] = end_pfn - start_pfn;

+ 1 - 31
arch/arm/include/asm/hugetlb-3level.h

@@ -29,6 +29,7 @@
  * ptes.
  * ptes.
  * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
  * (The valid bit is automatically cleared by set_pte_at for PROT_NONE ptes).
  */
  */
+#define __HAVE_ARCH_HUGE_PTEP_GET
 static inline pte_t huge_ptep_get(pte_t *ptep)
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 {
 	pte_t retval = *ptep;
 	pte_t retval = *ptep;
@@ -37,35 +38,4 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return retval;
 	return retval;
 }
 }
 
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep)
-{
-	ptep_clear_flush(vma, addr, ptep);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
 #endif /* _ASM_ARM_HUGETLB_3LEVEL_H */
 #endif /* _ASM_ARM_HUGETLB_3LEVEL_H */

+ 1 - 32
arch/arm/include/asm/hugetlb.h

@@ -23,18 +23,8 @@
 #define _ASM_ARM_HUGETLB_H
 #define _ASM_ARM_HUGETLB_H
 
 
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
 #include <asm/hugetlb-3level.h>
 #include <asm/hugetlb-3level.h>
-
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
+#include <asm-generic/hugetlb.h>
 
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr, unsigned long len)
 					 unsigned long addr, unsigned long len)
@@ -42,27 +32,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline int prepare_hugepage_range(struct file *file,
-					 unsigned long addr, unsigned long len)
-{
-	struct hstate *h = hstate_file(file);
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-	return 0;
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 	clear_bit(PG_dcache_clean, &page->flags);
 	clear_bit(PG_dcache_clean, &page->flags);

+ 7 - 32
arch/arm64/include/asm/hugetlb.h

@@ -20,48 +20,18 @@
 
 
 #include <asm/page.h>
 #include <asm/page.h>
 
 
+#define __HAVE_ARCH_HUGE_PTEP_GET
 static inline pte_t huge_ptep_get(pte_t *ptep)
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
 {
 	return READ_ONCE(*ptep);
 	return READ_ONCE(*ptep);
 }
 }
 
 
-
-
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr, unsigned long len)
 					 unsigned long addr, unsigned long len)
 {
 {
 	return 0;
 	return 0;
 }
 }
 
 
-static inline int prepare_hugepage_range(struct file *file,
-					 unsigned long addr, unsigned long len)
-{
-	struct hstate *h = hstate_file(file);
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-	return 0;
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 	clear_bit(PG_dcache_clean, &page->flags);
 	clear_bit(PG_dcache_clean, &page->flags);
@@ -70,20 +40,25 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 				struct page *page, int writable);
 				struct page *page, int writable);
 #define arch_make_huge_pte arch_make_huge_pte
 #define arch_make_huge_pte arch_make_huge_pte
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte);
 			    pte_t *ptep, pte_t pte);
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 				      unsigned long addr, pte_t *ptep,
 				      unsigned long addr, pte_t *ptep,
 				      pte_t pte, int dirty);
 				      pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 				     unsigned long addr, pte_t *ptep);
 				     unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
 extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep);
 				    unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
 				  unsigned long addr, pte_t *ptep);
 				  unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
 			   pte_t *ptep, unsigned long sz);
-#define huge_pte_clear huge_pte_clear
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, unsigned long sz);
 				 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
 #define set_huge_swap_pte_at set_huge_swap_pte_at

+ 8 - 6
arch/arm64/include/asm/string.h

@@ -16,6 +16,7 @@
 #ifndef __ASM_STRING_H
 #ifndef __ASM_STRING_H
 #define __ASM_STRING_H
 #define __ASM_STRING_H
 
 
+#ifndef CONFIG_KASAN
 #define __HAVE_ARCH_STRRCHR
 #define __HAVE_ARCH_STRRCHR
 extern char *strrchr(const char *, int c);
 extern char *strrchr(const char *, int c);
 
 
@@ -34,6 +35,13 @@ extern __kernel_size_t strlen(const char *);
 #define __HAVE_ARCH_STRNLEN
 #define __HAVE_ARCH_STRNLEN
 extern __kernel_size_t strnlen(const char *, __kernel_size_t);
 extern __kernel_size_t strnlen(const char *, __kernel_size_t);
 
 
+#define __HAVE_ARCH_MEMCMP
+extern int memcmp(const void *, const void *, size_t);
+
+#define __HAVE_ARCH_MEMCHR
+extern void *memchr(const void *, int, __kernel_size_t);
+#endif
+
 #define __HAVE_ARCH_MEMCPY
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *, const void *, __kernel_size_t);
 extern void *memcpy(void *, const void *, __kernel_size_t);
 extern void *__memcpy(void *, const void *, __kernel_size_t);
 extern void *__memcpy(void *, const void *, __kernel_size_t);
@@ -42,16 +50,10 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
 extern void *memmove(void *, const void *, __kernel_size_t);
 extern void *memmove(void *, const void *, __kernel_size_t);
 extern void *__memmove(void *, const void *, __kernel_size_t);
 extern void *__memmove(void *, const void *, __kernel_size_t);
 
 
-#define __HAVE_ARCH_MEMCHR
-extern void *memchr(const void *, int, __kernel_size_t);
-
 #define __HAVE_ARCH_MEMSET
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *, int, __kernel_size_t);
 extern void *memset(void *, int, __kernel_size_t);
 extern void *__memset(void *, int, __kernel_size_t);
 extern void *__memset(void *, int, __kernel_size_t);
 
 
-#define __HAVE_ARCH_MEMCMP
-extern int memcmp(const void *, const void *, size_t);
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE
 void memcpy_flushcache(void *dst, const void *src, size_t cnt);
 void memcpy_flushcache(void *dst, const void *src, size_t cnt);

+ 5 - 2
arch/arm64/kernel/arm64ksyms.c

@@ -44,20 +44,23 @@ EXPORT_SYMBOL(__arch_copy_in_user);
 EXPORT_SYMBOL(memstart_addr);
 EXPORT_SYMBOL(memstart_addr);
 
 
 	/* string / mem functions */
 	/* string / mem functions */
+#ifndef CONFIG_KASAN
 EXPORT_SYMBOL(strchr);
 EXPORT_SYMBOL(strchr);
 EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strncmp);
 EXPORT_SYMBOL(strncmp);
 EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strnlen);
 EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memchr);
+#endif
+
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(__memset);
 EXPORT_SYMBOL(__memset);
 EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(__memmove);
 EXPORT_SYMBOL(__memmove);
-EXPORT_SYMBOL(memchr);
-EXPORT_SYMBOL(memcmp);
 
 
 	/* atomic bitops */
 	/* atomic bitops */
 EXPORT_SYMBOL(set_bit);
 EXPORT_SYMBOL(set_bit);

+ 1 - 1
arch/arm64/lib/memchr.S

@@ -30,7 +30,7 @@
  * Returns:
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  *	x0 - address of first occurrence of 'c' or 0
  */
  */
-ENTRY(memchr)
+WEAK(memchr)
 	and	w1, w1, #0xff
 	and	w1, w1, #0xff
 1:	subs	x2, x2, #1
 1:	subs	x2, x2, #1
 	b.mi	2f
 	b.mi	2f

+ 1 - 1
arch/arm64/lib/memcmp.S

@@ -58,7 +58,7 @@ pos		.req	x11
 limit_wd	.req	x12
 limit_wd	.req	x12
 mask		.req	x13
 mask		.req	x13
 
 
-ENTRY(memcmp)
+WEAK(memcmp)
 	cbz	limit, .Lret0
 	cbz	limit, .Lret0
 	eor	tmp1, src1, src2
 	eor	tmp1, src1, src2
 	tst	tmp1, #7
 	tst	tmp1, #7

+ 1 - 1
arch/arm64/lib/strchr.S

@@ -29,7 +29,7 @@
  * Returns:
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  *	x0 - address of first occurrence of 'c' or 0
  */
  */
-ENTRY(strchr)
+WEAK(strchr)
 	and	w1, w1, #0xff
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 1:	ldrb	w2, [x0], #1
 	cmp	w2, w1
 	cmp	w2, w1

+ 1 - 1
arch/arm64/lib/strcmp.S

@@ -60,7 +60,7 @@ tmp3		.req	x9
 zeroones	.req	x10
 zeroones	.req	x10
 pos		.req	x11
 pos		.req	x11
 
 
-ENTRY(strcmp)
+WEAK(strcmp)
 	eor	tmp1, src1, src2
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
 	tst	tmp1, #7

+ 1 - 1
arch/arm64/lib/strlen.S

@@ -56,7 +56,7 @@ pos		.req	x12
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 #define REP8_80 0x8080808080808080
 
 
-ENTRY(strlen)
+WEAK(strlen)
 	mov	zeroones, #REP8_01
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
 	bic	src, srcin, #15
 	ands	tmp1, srcin, #15
 	ands	tmp1, srcin, #15

+ 1 - 1
arch/arm64/lib/strncmp.S

@@ -64,7 +64,7 @@ limit_wd	.req	x13
 mask		.req	x14
 mask		.req	x14
 endloop		.req	x15
 endloop		.req	x15
 
 
-ENTRY(strncmp)
+WEAK(strncmp)
 	cbz	limit, .Lret0
 	cbz	limit, .Lret0
 	eor	tmp1, src1, src2
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	mov	zeroones, #REP8_01

+ 1 - 1
arch/arm64/lib/strnlen.S

@@ -59,7 +59,7 @@ limit_wd	.req	x14
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 #define REP8_80 0x8080808080808080
 
 
-ENTRY(strnlen)
+WEAK(strnlen)
 	cbz	limit, .Lhit_limit
 	cbz	limit, .Lhit_limit
 	mov	zeroones, #REP8_01
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
 	bic	src, srcin, #15

+ 1 - 1
arch/arm64/lib/strrchr.S

@@ -29,7 +29,7 @@
  * Returns:
  * Returns:
  *	x0 - address of last occurrence of 'c' or 0
  *	x0 - address of last occurrence of 'c' or 0
  */
  */
-ENTRY(strrchr)
+WEAK(strrchr)
 	mov	x3, #0
 	mov	x3, #0
 	and	w1, w1, #0xff
 	and	w1, w1, #0xff
 1:	ldrb	w2, [x0], #1
 1:	ldrb	w2, [x0], #1

+ 3 - 0
arch/hexagon/Kconfig

@@ -21,6 +21,9 @@ config HEXAGON
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_MEMBLOCK
+	select ARCH_DISCARD_MEMBLOCK
+	select NO_BOOTMEM
 	select NEED_SG_DMA_LENGTH
 	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
 	select NO_IOPORT_MAP
 	select GENERIC_IOMAP
 	select GENERIC_IOMAP

+ 8 - 12
arch/hexagon/mm/init.c

@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <linux/highmem.h>
 #include <linux/highmem.h>
 #include <asm/tlb.h>
 #include <asm/tlb.h>
@@ -176,7 +177,6 @@ size_t hexagon_coherent_pool_size = (size_t) (DMA_RESERVE << 22);
 
 
 void __init setup_arch_memory(void)
 void __init setup_arch_memory(void)
 {
 {
-	int bootmap_size;
 	/*  XXX Todo: this probably should be cleaned up  */
 	/*  XXX Todo: this probably should be cleaned up  */
 	u32 *segtable = (u32 *) &swapper_pg_dir[0];
 	u32 *segtable = (u32 *) &swapper_pg_dir[0];
 	u32 *segtable_end;
 	u32 *segtable_end;
@@ -195,18 +195,22 @@ void __init setup_arch_memory(void)
 	bootmem_lastpg = PFN_DOWN((bootmem_lastpg << PAGE_SHIFT) &
 	bootmem_lastpg = PFN_DOWN((bootmem_lastpg << PAGE_SHIFT) &
 		~((BIG_KERNEL_PAGE_SIZE) - 1));
 		~((BIG_KERNEL_PAGE_SIZE) - 1));
 
 
+	memblock_add(PHYS_OFFSET,
+		     (bootmem_lastpg - ARCH_PFN_OFFSET) << PAGE_SHIFT);
+
+	/* Reserve kernel text/data/bss */
+	memblock_reserve(PHYS_OFFSET,
+			 (bootmem_startpg - ARCH_PFN_OFFSET) << PAGE_SHIFT);
 	/*
 	/*
 	 * Reserve the top DMA_RESERVE bytes of RAM for DMA (uncached)
 	 * Reserve the top DMA_RESERVE bytes of RAM for DMA (uncached)
 	 * memory allocation
 	 * memory allocation
 	 */
 	 */
-
 	max_low_pfn = bootmem_lastpg - PFN_DOWN(DMA_RESERVED_BYTES);
 	max_low_pfn = bootmem_lastpg - PFN_DOWN(DMA_RESERVED_BYTES);
 	min_low_pfn = ARCH_PFN_OFFSET;
 	min_low_pfn = ARCH_PFN_OFFSET;
-	bootmap_size =  init_bootmem_node(NODE_DATA(0), bootmem_startpg, min_low_pfn, max_low_pfn);
+	memblock_reserve(PFN_PHYS(max_low_pfn), DMA_RESERVED_BYTES);
 
 
 	printk(KERN_INFO "bootmem_startpg:  0x%08lx\n", bootmem_startpg);
 	printk(KERN_INFO "bootmem_startpg:  0x%08lx\n", bootmem_startpg);
 	printk(KERN_INFO "bootmem_lastpg:  0x%08lx\n", bootmem_lastpg);
 	printk(KERN_INFO "bootmem_lastpg:  0x%08lx\n", bootmem_lastpg);
-	printk(KERN_INFO "bootmap_size:  %d\n", bootmap_size);
 	printk(KERN_INFO "min_low_pfn:  0x%08lx\n", min_low_pfn);
 	printk(KERN_INFO "min_low_pfn:  0x%08lx\n", min_low_pfn);
 	printk(KERN_INFO "max_low_pfn:  0x%08lx\n", max_low_pfn);
 	printk(KERN_INFO "max_low_pfn:  0x%08lx\n", max_low_pfn);
 
 
@@ -256,14 +260,6 @@ void __init setup_arch_memory(void)
 	printk(KERN_INFO "*segtable = 0x%08x\n", *segtable);
 	printk(KERN_INFO "*segtable = 0x%08x\n", *segtable);
 #endif
 #endif
 
 
-	/*
-	 * Free all the memory that wasn't taken up by the bootmap, the DMA
-	 * reserve, or kernel itself.
-	 */
-	free_bootmem(PFN_PHYS(bootmem_startpg) + bootmap_size,
-		     PFN_PHYS(bootmem_lastpg - bootmem_startpg) - bootmap_size -
-		     DMA_RESERVED_BYTES);
-
 	/*
 	/*
 	 *  The bootmem allocator seemingly just lives to feed memory
 	 *  The bootmem allocator seemingly just lives to feed memory
 	 *  to the paging system
 	 *  to the paging system

+ 5 - 42
arch/ia64/include/asm/hugetlb.h

@@ -3,13 +3,13 @@
 #define _ASM_IA64_HUGETLB_H
 #define _ASM_IA64_HUGETLB_H
 
 
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
 
 
+#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 			    unsigned long ceiling);
 
 
+#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 int prepare_hugepage_range(struct file *file,
 int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len);
 			unsigned long addr, unsigned long len);
 
 
@@ -21,53 +21,16 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 		REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
 		REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE);
 }
 }
 
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }
 
 
+#include <asm-generic/hugetlb.h>
+
 #endif /* _ASM_IA64_HUGETLB_H */
 #endif /* _ASM_IA64_HUGETLB_H */

+ 0 - 1
arch/ia64/include/asm/pgtable.h

@@ -544,7 +544,6 @@ extern struct page *zero_page_memmap_ptr;
 
 
 #  ifdef CONFIG_VIRTUAL_MEM_MAP
 #  ifdef CONFIG_VIRTUAL_MEM_MAP
   /* arch mem_map init routine is needed due to holes in a virtual mem_map */
   /* arch mem_map init routine is needed due to holes in a virtual mem_map */
-#   define __HAVE_ARCH_MEMMAP_INIT
     extern void memmap_init (unsigned long size, int nid, unsigned long zone,
     extern void memmap_init (unsigned long size, int nid, unsigned long zone,
 			     unsigned long start_pfn);
 			     unsigned long start_pfn);
 #  endif /* CONFIG_VIRTUAL_MEM_MAP */
 #  endif /* CONFIG_VIRTUAL_MEM_MAP */

+ 7 - 33
arch/mips/include/asm/hugetlb.h

@@ -10,8 +10,6 @@
 #define __ASM_HUGETLB_H
 #define __ASM_HUGETLB_H
 
 
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
 
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
 					 unsigned long addr,
@@ -20,6 +18,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 	return 0;
 }
 }
 
 
+#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 static inline int prepare_hugepage_range(struct file *file,
 static inline int prepare_hugepage_range(struct file *file,
 					 unsigned long addr,
 					 unsigned long addr,
 					 unsigned long len)
 					 unsigned long len)
@@ -38,21 +37,7 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr,
-					  unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
 					    unsigned long addr, pte_t *ptep)
 {
 {
@@ -64,29 +49,21 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 	return pte;
 	return pte;
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
 	flush_tlb_page(vma, addr & huge_page_mask(hstate_vma(vma)));
 	flush_tlb_page(vma, addr & huge_page_mask(hstate_vma(vma)));
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTE_NONE
 static inline int huge_pte_none(pte_t pte)
 static inline int huge_pte_none(pte_t pte)
 {
 {
 	unsigned long val = pte_val(pte) & ~_PAGE_GLOBAL;
 	unsigned long val = pte_val(pte) & ~_PAGE_GLOBAL;
 	return !val || (val == (unsigned long)invalid_pte_table);
 	return !val || (val == (unsigned long)invalid_pte_table);
 }
 }
 
 
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     unsigned long addr,
 					     pte_t *ptep, pte_t pte,
 					     pte_t *ptep, pte_t pte,
@@ -105,13 +82,10 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	return changed;
 	return changed;
 }
 }
 
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }
 
 
+#include <asm-generic/hugetlb.h>
+
 #endif /* __ASM_HUGETLB_H */
 #endif /* __ASM_HUGETLB_H */

+ 3 - 0
arch/nios2/Kconfig

@@ -23,6 +23,9 @@ config NIOS2
 	select SPARSE_IRQ
 	select SPARSE_IRQ
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
 	select CPU_NO_EFFICIENT_FFS
 	select CPU_NO_EFFICIENT_FFS
+	select HAVE_MEMBLOCK
+	select ARCH_DISCARD_MEMBLOCK
+	select NO_BOOTMEM
 
 
 config GENERIC_CSUM
 config GENERIC_CSUM
 	def_bool y
 	def_bool y

+ 0 - 17
arch/nios2/kernel/prom.c

@@ -32,23 +32,6 @@
 
 
 #include <asm/sections.h>
 #include <asm/sections.h>
 
 
-void __init early_init_dt_add_memory_arch(u64 base, u64 size)
-{
-	u64 kernel_start = (u64)virt_to_phys(_text);
-
-	if (!memory_size &&
-	    (kernel_start >= base) && (kernel_start < (base + size)))
-		memory_size = size;
-
-}
-
-int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
-					     bool nomap)
-{
-	reserve_bootmem(base, size, BOOTMEM_DEFAULT);
-	return 0;
-}
-
 void __init early_init_devtree(void *params)
 void __init early_init_devtree(void *params)
 {
 {
 	__be32 *dtb = (u32 *)__dtb_start;
 	__be32 *dtb = (u32 *)__dtb_start;

+ 7 - 32
arch/nios2/kernel/setup.c

@@ -17,6 +17,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task.h>
 #include <linux/console.h>
 #include <linux/console.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
 #include <linux/of_fdt.h>
 #include <linux/screen_info.h>
 #include <linux/screen_info.h>
@@ -143,10 +144,12 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
 
 
 void __init setup_arch(char **cmdline_p)
 void __init setup_arch(char **cmdline_p)
 {
 {
-	int bootmap_size;
+	int dram_start;
 
 
 	console_verbose();
 	console_verbose();
 
 
+	dram_start = memblock_start_of_DRAM();
+	memory_size = memblock_phys_mem_size();
 	memory_start = PAGE_ALIGN((unsigned long)__pa(_end));
 	memory_start = PAGE_ALIGN((unsigned long)__pa(_end));
 	memory_end = (unsigned long) CONFIG_NIOS2_MEM_BASE + memory_size;
 	memory_end = (unsigned long) CONFIG_NIOS2_MEM_BASE + memory_size;
 
 
@@ -163,39 +166,11 @@ void __init setup_arch(char **cmdline_p)
 	max_low_pfn = PFN_DOWN(memory_end);
 	max_low_pfn = PFN_DOWN(memory_end);
 	max_mapnr = max_low_pfn;
 	max_mapnr = max_low_pfn;
 
 
-	/*
-	 * give all the memory to the bootmap allocator,  tell it to put the
-	 * boot mem_map at the start of memory
-	 */
-	pr_debug("init_bootmem_node(?,%#lx, %#x, %#lx)\n",
-		min_low_pfn, PFN_DOWN(PHYS_OFFSET), max_low_pfn);
-	bootmap_size = init_bootmem_node(NODE_DATA(0),
-					min_low_pfn, PFN_DOWN(PHYS_OFFSET),
-					max_low_pfn);
-
-	/*
-	 * free the usable memory,  we have to make sure we do not free
-	 * the bootmem bitmap so we then reserve it after freeing it :-)
-	 */
-	pr_debug("free_bootmem(%#lx, %#lx)\n",
-		memory_start, memory_end - memory_start);
-	free_bootmem(memory_start, memory_end - memory_start);
-
-	/*
-	 * Reserve the bootmem bitmap itself as well. We do this in two
-	 * steps (first step was init_bootmem()) because this catches
-	 * the (very unlikely) case of us accidentally initializing the
-	 * bootmem allocator with an invalid RAM area.
-	 *
-	 * Arguments are start, size
-	 */
-	pr_debug("reserve_bootmem(%#lx, %#x)\n", memory_start, bootmap_size);
-	reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT);
-
+	memblock_reserve(dram_start, memory_start - dram_start);
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_start) {
 	if (initrd_start) {
-		reserve_bootmem(virt_to_phys((void *)initrd_start),
-				initrd_end - initrd_start, BOOTMEM_DEFAULT);
+		memblock_reserve(virt_to_phys((void *)initrd_start),
+				initrd_end - initrd_start);
 	}
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 

+ 8 - 25
arch/parisc/include/asm/hugetlb.h

@@ -3,12 +3,12 @@
 #define _ASM_PARISC64_HUGETLB_H
 #define _ASM_PARISC64_HUGETLB_H
 
 
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
 
 
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
 		     pte_t *ptep, pte_t pte);
 
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep);
 			      pte_t *ptep);
 
 
@@ -22,6 +22,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  * size aligned regions are ok without further preparation.
  */
  */
+#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 static inline int prepare_hugepage_range(struct file *file,
 static inline int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len)
 			unsigned long addr, unsigned long len)
 {
 {
@@ -32,43 +33,25 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep);
 					   unsigned long addr, pte_t *ptep);
 
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty);
 					     pte_t pte, int dirty);
 
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }
 
 
+#include <asm-generic/hugetlb.h>
+
 #endif /* _ASM_PARISC64_HUGETLB_H */
 #endif /* _ASM_PARISC64_HUGETLB_H */

+ 0 - 6
arch/powerpc/include/asm/book3s/32/pgtable.h

@@ -311,12 +311,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 {
 {
 	pte_update(ptep, _PAGE_RW, 0);
 	pte_update(ptep, _PAGE_RW, 0);
 }
 }
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 
 
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
 					   pte_t *ptep, pte_t entry,

+ 1 - 0
arch/powerpc/include/asm/book3s/64/pgtable.h

@@ -426,6 +426,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 					   unsigned long addr, pte_t *ptep)
 {
 {

+ 6 - 37
arch/powerpc/include/asm/hugetlb.h

@@ -4,7 +4,6 @@
 
 
 #ifdef CONFIG_HUGETLB_PAGE
 #ifdef CONFIG_HUGETLB_PAGE
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
 
 
 extern struct kmem_cache *hugepte_cache;
 extern struct kmem_cache *hugepte_cache;
 
 
@@ -110,31 +109,12 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 #endif
 #endif
 
 
+#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 			    unsigned long ceiling);
 
 
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
-			unsigned long addr, unsigned long len)
-{
-	struct hstate *h = hstate_file(file);
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-	return 0;
-}
-
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 					    unsigned long addr, pte_t *ptep)
 					    unsigned long addr, pte_t *ptep)
 {
 {
@@ -145,6 +125,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 #endif
 #endif
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
@@ -153,29 +134,17 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 	flush_hugetlb_page(vma, addr);
 	flush_hugetlb_page(vma, addr);
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 				      unsigned long addr, pte_t *ptep,
 				      unsigned long addr, pte_t *ptep,
 				      pte_t pte, int dirty);
 				      pte_t pte, int dirty);
 
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }
 
 
+#include <asm-generic/hugetlb.h>
+
 #else /* ! CONFIG_HUGETLB_PAGE */
 #else /* ! CONFIG_HUGETLB_PAGE */
 static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 				      unsigned long vmaddr)
 				      unsigned long vmaddr)

+ 0 - 6
arch/powerpc/include/asm/nohash/32/pgtable.h

@@ -300,12 +300,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 
 
 	pte_update(ptep, clr, set);
 	pte_update(ptep, clr, set);
 }
 }
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
 
 
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 					   pte_t *ptep, pte_t entry,
 					   pte_t *ptep, pte_t entry,

+ 1 - 0
arch/powerpc/include/asm/nohash/64/pgtable.h

@@ -275,6 +275,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
 	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 					   unsigned long addr, pte_t *ptep)
 {
 {

+ 1 - 1
arch/powerpc/platforms/cell/cpufreq_spudemand.c

@@ -49,7 +49,7 @@ static int calc_freq(struct spu_gov_info_struct *info)
 	cpu = info->policy->cpu;
 	cpu = info->policy->cpu;
 	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
 	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
 
 
-	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
 	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
 	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
 			cpu, busy_spus, info->busy_spus);
 			cpu, busy_spus, info->busy_spus);
 
 

+ 3 - 6
arch/powerpc/platforms/cell/spufs/sched.c

@@ -987,9 +987,9 @@ static void spu_calc_load(void)
 	unsigned long active_tasks; /* fixed-point */
 	unsigned long active_tasks; /* fixed-point */
 
 
 	active_tasks = count_active_contexts() * FIXED_1;
 	active_tasks = count_active_contexts() * FIXED_1;
-	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
-	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
-	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+	spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
+	spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
+	spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
 }
 }
 
 
 static void spusched_wake(struct timer_list *unused)
 static void spusched_wake(struct timer_list *unused)
@@ -1071,9 +1071,6 @@ void spuctx_switch_state(struct spu_context *ctx,
 	}
 	}
 }
 }
 
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int show_spu_loadavg(struct seq_file *s, void *private)
 static int show_spu_loadavg(struct seq_file *s, void *private)
 {
 {
 	int a, b, c;
 	int a, b, c;

+ 0 - 4
arch/s390/appldata/appldata_os.c

@@ -25,10 +25,6 @@
 
 
 #include "appldata.h"
 #include "appldata.h"
 
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 /*
 /*
  * OS data
  * OS data
  *
  *

+ 4 - 50
arch/sh/include/asm/hugetlb.h

@@ -4,8 +4,6 @@
 
 
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
 
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
 					 unsigned long addr,
@@ -17,6 +15,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
  * If the arch doesn't supply something else, assume that hugepage
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  * size aligned regions are ok without further preparation.
  */
  */
+#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
 static inline int prepare_hugepage_range(struct file *file,
 static inline int prepare_hugepage_range(struct file *file,
 			unsigned long addr, unsigned long len)
 			unsigned long addr, unsigned long len)
 {
 {
@@ -27,62 +26,17 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 	return 0;
 }
 }
 
 
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 	clear_bit(PG_dcache_clean, &page->flags);
 	clear_bit(PG_dcache_clean, &page->flags);
 }
 }
 
 
+#include <asm-generic/hugetlb.h>
+
 #endif /* _ASM_SH_HUGETLB_H */
 #endif /* _ASM_SH_HUGETLB_H */

+ 8 - 32
arch/sparc/include/asm/hugetlb.h

@@ -3,7 +3,6 @@
 #define _ASM_SPARC64_HUGETLB_H
 #define _ASM_SPARC64_HUGETLB_H
 
 
 #include <asm/page.h>
 #include <asm/page.h>
-#include <asm-generic/hugetlb.h>
 
 
 #ifdef CONFIG_HUGETLB_PAGE
 #ifdef CONFIG_HUGETLB_PAGE
 struct pud_huge_patch_entry {
 struct pud_huge_patch_entry {
@@ -13,9 +12,11 @@ struct pud_huge_patch_entry {
 extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end;
 extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end;
 #endif
 #endif
 
 
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte);
 		     pte_t *ptep, pte_t pte);
 
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep);
 			      pte_t *ptep);
 
 
@@ -25,37 +26,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
-			unsigned long addr, unsigned long len)
-{
-	struct hstate *h = hstate_file(file);
-
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-	return 0;
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 					 unsigned long addr, pte_t *ptep)
 {
 {
 }
 }
 
 
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 					   unsigned long addr, pte_t *ptep)
 {
 {
@@ -63,6 +40,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
 	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
 }
 }
 
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
 					     pte_t pte, int dirty)
@@ -75,17 +53,15 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	return changed;
 	return changed;
 }
 }
 
 
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }
 
 
+#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 			    unsigned long end, unsigned long floor,
 			    unsigned long end, unsigned long floor,
 			    unsigned long ceiling);
 			    unsigned long ceiling);
 
 
+#include <asm-generic/hugetlb.h>
+
 #endif /* _ASM_SPARC64_HUGETLB_H */
 #endif /* _ASM_SPARC64_HUGETLB_H */

+ 2 - 0
arch/um/Kconfig

@@ -12,6 +12,8 @@ config UML
 	select HAVE_UID16
 	select HAVE_UID16
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS

+ 10 - 12
arch/um/kernel/physmem.c

@@ -5,6 +5,7 @@
 
 
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/pfn.h>
 #include <asm/page.h>
 #include <asm/page.h>
@@ -80,28 +81,23 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 			  unsigned long len, unsigned long long highmem)
 			  unsigned long len, unsigned long long highmem)
 {
 {
 	unsigned long reserve = reserve_end - start;
 	unsigned long reserve = reserve_end - start;
-	unsigned long pfn = PFN_UP(__pa(reserve_end));
-	unsigned long delta = (len - reserve) >> PAGE_SHIFT;
-	unsigned long offset, bootmap_size;
-	long map_size;
+	long map_size = len - reserve;
 	int err;
 	int err;
 
 
-	offset = uml_reserved - uml_physmem;
-	map_size = len - offset;
 	if(map_size <= 0) {
 	if(map_size <= 0) {
 		os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
 		os_warn("Too few physical memory! Needed=%lu, given=%lu\n",
-			offset, len);
+			reserve, len);
 		exit(1);
 		exit(1);
 	}
 	}
 
 
 	physmem_fd = create_mem_file(len + highmem);
 	physmem_fd = create_mem_file(len + highmem);
 
 
-	err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
+	err = os_map_memory((void *) reserve_end, physmem_fd, reserve,
 			    map_size, 1, 1, 1);
 			    map_size, 1, 1, 1);
 	if (err < 0) {
 	if (err < 0) {
 		os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
 		os_warn("setup_physmem - mapping %ld bytes of memory at 0x%p "
 			"failed - errno = %d\n", map_size,
 			"failed - errno = %d\n", map_size,
-			(void *) uml_reserved, err);
+			(void *) reserve_end, err);
 		exit(1);
 		exit(1);
 	}
 	}
 
 
@@ -113,9 +109,11 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 	os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
 	os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE);
 	os_fsync_file(physmem_fd);
 	os_fsync_file(physmem_fd);
 
 
-	bootmap_size = init_bootmem(pfn, pfn + delta);
-	free_bootmem(__pa(reserve_end) + bootmap_size,
-		     len - bootmap_size - reserve);
+	memblock_add(__pa(start), len + highmem);
+	memblock_reserve(__pa(start), reserve);
+
+	min_low_pfn = PFN_UP(__pa(reserve_end));
+	max_low_pfn = min_low_pfn + (map_size >> PAGE_SHIFT);
 }
 }
 
 
 int phys_mapping(unsigned long phys, unsigned long long *offset_out)
 int phys_mapping(unsigned long phys, unsigned long long *offset_out)

+ 1 - 0
arch/unicore32/Kconfig

@@ -6,6 +6,7 @@ config UNICORE32
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_DIRECT_OPS
 	select DMA_DIRECT_OPS
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_BZIP2

+ 1 - 53
arch/unicore32/mm/init.c

@@ -84,58 +84,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low,
 	}
 	}
 }
 }
 
 
-static void __init uc32_bootmem_init(unsigned long start_pfn,
-	unsigned long end_pfn)
-{
-	struct memblock_region *reg;
-	unsigned int boot_pages;
-	phys_addr_t bitmap;
-	pg_data_t *pgdat;
-
-	/*
-	 * Allocate the bootmem bitmap page.  This must be in a region
-	 * of memory which has already been mapped.
-	 */
-	boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
-				__pfn_to_phys(end_pfn));
-
-	/*
-	 * Initialise the bootmem allocator, handing the
-	 * memory banks over to bootmem.
-	 */
-	node_set_online(0);
-	pgdat = NODE_DATA(0);
-	init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
-
-	/* Free the lowmem regions from memblock into bootmem. */
-	for_each_memblock(memory, reg) {
-		unsigned long start = memblock_region_memory_base_pfn(reg);
-		unsigned long end = memblock_region_memory_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
-	}
-
-	/* Reserve the lowmem memblock reserved regions in bootmem. */
-	for_each_memblock(reserved, reg) {
-		unsigned long start = memblock_region_reserved_base_pfn(reg);
-		unsigned long end = memblock_region_reserved_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		reserve_bootmem(__pfn_to_phys(start),
-			(end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
-	}
-}
-
 static void __init uc32_bootmem_free(unsigned long min, unsigned long max_low,
 static void __init uc32_bootmem_free(unsigned long min, unsigned long max_low,
 	unsigned long max_high)
 	unsigned long max_high)
 {
 {
@@ -232,7 +180,7 @@ void __init bootmem_init(void)
 
 
 	find_limits(&min, &max_low, &max_high);
 	find_limits(&min, &max_low, &max_high);
 
 
-	uc32_bootmem_init(min, max_low);
+	node_set_online(0);
 
 
 	/*
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(),
 	 * Sparsemem tries to allocate bootmem in memory_present(),

+ 9 - 15
arch/x86/entry/vdso/vma.c

@@ -39,7 +39,7 @@ void __init init_vdso_image(const struct vdso_image *image)
 
 
 struct linux_binprm;
 struct linux_binprm;
 
 
-static int vdso_fault(const struct vm_special_mapping *sm,
+static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 {
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
@@ -84,12 +84,11 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	return 0;
 	return 0;
 }
 }
 
 
-static int vvar_fault(const struct vm_special_mapping *sm,
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 {
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
 	long sym_offset;
 	long sym_offset;
-	int ret = -EFAULT;
 
 
 	if (!image)
 	if (!image)
 		return VM_FAULT_SIGBUS;
 		return VM_FAULT_SIGBUS;
@@ -108,29 +107,24 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		return VM_FAULT_SIGBUS;
 		return VM_FAULT_SIGBUS;
 
 
 	if (sym_offset == image->sym_vvar_page) {
 	if (sym_offset == image->sym_vvar_page) {
-		ret = vm_insert_pfn(vma, vmf->address,
-				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+		return vmf_insert_pfn(vma, vmf->address,
+				__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
 	} else if (sym_offset == image->sym_pvclock_page) {
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
 		struct pvclock_vsyscall_time_info *pvti =
 			pvclock_get_pvti_cpu0_va();
 			pvclock_get_pvti_cpu0_va();
 		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
 		if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
-			ret = vm_insert_pfn_prot(
-				vma,
-				vmf->address,
-				__pa(pvti) >> PAGE_SHIFT,
-				pgprot_decrypted(vma->vm_page_prot));
+			return vmf_insert_pfn_prot(vma, vmf->address,
+					__pa(pvti) >> PAGE_SHIFT,
+					pgprot_decrypted(vma->vm_page_prot));
 		}
 		}
 	} else if (sym_offset == image->sym_hvclock_page) {
 	} else if (sym_offset == image->sym_hvclock_page) {
 		struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
 		struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
 
 
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
-			ret = vm_insert_pfn(vma, vmf->address,
-					    vmalloc_to_pfn(tsc_pg));
+			return vmf_insert_pfn(vma, vmf->address,
+					vmalloc_to_pfn(tsc_pg));
 	}
 	}
 
 
-	if (ret == 0 || ret == -EBUSY)
-		return VM_FAULT_NOPAGE;
-
 	return VM_FAULT_SIGBUS;
 	return VM_FAULT_SIGBUS;
 }
 }
 
 

+ 0 - 69
arch/x86/include/asm/hugetlb.h

@@ -13,75 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
-			unsigned long addr, unsigned long len)
-{
-	struct hstate *h = hstate_file(file);
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (addr & ~huge_page_mask(h))
-		return -EINVAL;
-	return 0;
-}
-
-static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
-					  unsigned long addr, unsigned long end,
-					  unsigned long floor,
-					  unsigned long ceiling)
-{
-	free_pgd_range(tlb, addr, end, floor, ceiling);
-}
-
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep)
-{
-	ptep_clear_flush(vma, addr, ptep);
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
-
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
-	return *ptep;
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 {
 }
 }

+ 3 - 12
arch/x86/kernel/e820.c

@@ -1248,7 +1248,6 @@ void __init e820__memblock_setup(void)
 {
 {
 	int i;
 	int i;
 	u64 end;
 	u64 end;
-	u64 addr = 0;
 
 
 	/*
 	/*
 	 * The bootstrap memblock region count maximum is 128 entries
 	 * The bootstrap memblock region count maximum is 128 entries
@@ -1265,21 +1264,13 @@ void __init e820__memblock_setup(void)
 		struct e820_entry *entry = &e820_table->entries[i];
 		struct e820_entry *entry = &e820_table->entries[i];
 
 
 		end = entry->addr + entry->size;
 		end = entry->addr + entry->size;
-		if (addr < entry->addr)
-			memblock_reserve(addr, entry->addr - addr);
-		addr = end;
 		if (end != (resource_size_t)end)
 		if (end != (resource_size_t)end)
 			continue;
 			continue;
 
 
-		/*
-		 * all !E820_TYPE_RAM ranges (including gap ranges) are put
-		 * into memblock.reserved to make sure that struct pages in
-		 * such regions are not left uninitialized after bootup.
-		 */
 		if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
 		if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
-			memblock_reserve(entry->addr, entry->size);
-		else
-			memblock_add(entry->addr, entry->size);
+			continue;
+
+		memblock_add(entry->addr, entry->size);
 	}
 	}
 
 
 	/* Throw away partial pages: */
 	/* Throw away partial pages: */

+ 1 - 0
arch/xtensa/include/asm/Kbuild

@@ -26,5 +26,6 @@ generic-y += rwsem.h
 generic-y += sections.h
 generic-y += sections.h
 generic-y += topology.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += trace_clock.h
+generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
 generic-y += xor.h

+ 0 - 19
arch/xtensa/include/asm/vga.h

@@ -1,19 +0,0 @@
-/*
- * include/asm-xtensa/vga.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_VGA_H
-#define _XTENSA_VGA_H
-
-#define VGA_MAP_MEM(x,s) (unsigned long)phys_to_virt(x)
-
-#define vga_readb(x)	(*(x))
-#define vga_writeb(x,y)	(*(y) = (x))
-
-#endif

+ 5 - 3
block/blk-iolatency.c

@@ -153,7 +153,7 @@ struct iolatency_grp {
 #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
 #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
 /*
 /*
  * These are the constants used to fake the fixed-point moving average
  * These are the constants used to fake the fixed-point moving average
- * calculation just like load average.  The call to CALC_LOAD folds
+ * calculation just like load average.  The call to calc_load() folds
  * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg.  The sampling
  * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg.  The sampling
  * window size is bucketed to try to approximately calculate average
  * window size is bucketed to try to approximately calculate average
  * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
  * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
@@ -248,7 +248,7 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
 		return;
 		return;
 
 
 	/*
 	/*
-	 * CALC_LOAD takes in a number stored in fixed point representation.
+	 * calc_load() takes in a number stored in fixed point representation.
 	 * Because we are using this for IO time in ns, the values stored
 	 * Because we are using this for IO time in ns, the values stored
 	 * are significantly larger than the FIXED_1 denominator (2048).
 	 * are significantly larger than the FIXED_1 denominator (2048).
 	 * Therefore, rounding errors in the calculation are negligible and
 	 * Therefore, rounding errors in the calculation are negligible and
@@ -257,7 +257,9 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
 	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
 	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
 			div64_u64(iolat->cur_win_nsec,
 			div64_u64(iolat->cur_win_nsec,
 				  BLKIOLATENCY_EXP_BUCKET_SIZE));
 				  BLKIOLATENCY_EXP_BUCKET_SIZE));
-	CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean);
+	iolat->lat_avg = calc_load(iolat->lat_avg,
+				   iolatency_exp_factors[exp_idx],
+				   stat->rqs.mean);
 }
 }
 
 
 static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
 static inline bool iolatency_may_queue(struct iolatency_grp *iolat,

+ 12 - 7
drivers/base/node.c

@@ -67,8 +67,11 @@ static ssize_t node_read_meminfo(struct device *dev,
 	int nid = dev->id;
 	int nid = dev->id;
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct sysinfo i;
 	struct sysinfo i;
+	unsigned long sreclaimable, sunreclaimable;
 
 
 	si_meminfo_node(&i, nid);
 	si_meminfo_node(&i, nid);
+	sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
+	sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
 	n = sprintf(buf,
 	n = sprintf(buf,
 		       "Node %d MemTotal:       %8lu kB\n"
 		       "Node %d MemTotal:       %8lu kB\n"
 		       "Node %d MemFree:        %8lu kB\n"
 		       "Node %d MemFree:        %8lu kB\n"
@@ -118,6 +121,7 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
 		       "Node %d WritebackTmp:   %8lu kB\n"
 		       "Node %d WritebackTmp:   %8lu kB\n"
+		       "Node %d KReclaimable:   %8lu kB\n"
 		       "Node %d Slab:           %8lu kB\n"
 		       "Node %d Slab:           %8lu kB\n"
 		       "Node %d SReclaimable:   %8lu kB\n"
 		       "Node %d SReclaimable:   %8lu kB\n"
 		       "Node %d SUnreclaim:     %8lu kB\n"
 		       "Node %d SUnreclaim:     %8lu kB\n"
@@ -138,20 +142,21 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
-		       nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
-			      node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
-		       nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
+		       nid, K(sreclaimable +
+			      node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
+		       nid, K(sreclaimable + sunreclaimable),
+		       nid, K(sreclaimable),
+		       nid, K(sunreclaimable)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		       nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
+		       ,
 		       nid, K(node_page_state(pgdat, NR_ANON_THPS) *
 		       nid, K(node_page_state(pgdat, NR_ANON_THPS) *
 				       HPAGE_PMD_NR),
 				       HPAGE_PMD_NR),
 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
 				       HPAGE_PMD_NR),
 				       HPAGE_PMD_NR),
 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
-				       HPAGE_PMD_NR));
-#else
-		       nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
+				       HPAGE_PMD_NR)
 #endif
 #endif
+		       );
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
 	return n;
 }
 }

+ 0 - 4
drivers/cpuidle/governors/menu.c

@@ -130,10 +130,6 @@ struct menu_device {
 	int		interval_ptr;
 	int		interval_ptr;
 };
 };
 
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static inline int get_loadavg(unsigned long load)
 static inline int get_loadavg(unsigned long load)
 {
 {
 	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
 	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;

+ 0 - 1
drivers/infiniband/hw/hfi1/mmu_rb.c

@@ -77,7 +77,6 @@ static void do_remove(struct mmu_rb_handler *handler,
 static void handle_remove(struct work_struct *work);
 static void handle_remove(struct work_struct *work);
 
 
 static const struct mmu_notifier_ops mn_opts = {
 static const struct mmu_notifier_ops mn_opts = {
-	.flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.invalidate_range_start = mmu_notifier_range_start,
 	.invalidate_range_start = mmu_notifier_range_start,
 };
 };
 
 

+ 0 - 1
drivers/iommu/amd_iommu_v2.c

@@ -427,7 +427,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 }
 
 
 static const struct mmu_notifier_ops iommu_mn = {
 static const struct mmu_notifier_ops iommu_mn = {
-	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.release		= mn_release,
 	.release		= mn_release,
 	.clear_flush_young      = mn_clear_flush_young,
 	.clear_flush_young      = mn_clear_flush_young,
 	.invalidate_range       = mn_invalidate_range,
 	.invalidate_range       = mn_invalidate_range,

+ 0 - 1
drivers/iommu/intel-svm.c

@@ -273,7 +273,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 }
 
 
 static const struct mmu_notifier_ops intel_mmuops = {
 static const struct mmu_notifier_ops intel_mmuops = {
-	.flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.release = intel_mm_release,
 	.release = intel_mm_release,
 	.change_pte = intel_change_pte,
 	.change_pte = intel_change_pte,
 	.invalidate_range = intel_invalidate_range,
 	.invalidate_range = intel_invalidate_range,

+ 0 - 1
drivers/misc/sgi-gru/grutlbpurge.c

@@ -261,7 +261,6 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
 
 
 
 
 static const struct mmu_notifier_ops gru_mmuops = {
 static const struct mmu_notifier_ops gru_mmuops = {
-	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.invalidate_range_start	= gru_invalidate_range_start,
 	.invalidate_range_start	= gru_invalidate_range_start,
 	.invalidate_range_end	= gru_invalidate_range_end,
 	.invalidate_range_end	= gru_invalidate_range_end,
 	.release		= gru_release,
 	.release		= gru_release,

+ 6 - 5
drivers/of/fdt.c

@@ -1127,12 +1127,13 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 {
 	const u64 phys_offset = MIN_MEMBLOCK_ADDR;
 	const u64 phys_offset = MIN_MEMBLOCK_ADDR;
 
 
+	if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
+		pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
+			base, base + size);
+		return;
+	}
+
 	if (!PAGE_ALIGNED(base)) {
 	if (!PAGE_ALIGNED(base)) {
-		if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {
-			pr_warn("Ignoring memory block 0x%llx - 0x%llx\n",
-				base, base + size);
-			return;
-		}
 		size -= PAGE_SIZE - (base & ~PAGE_MASK);
 		size -= PAGE_SIZE - (base & ~PAGE_MASK);
 		base = PAGE_ALIGN(base);
 		base = PAGE_ALIGN(base);
 	}
 	}

+ 4 - 4
drivers/staging/android/ion/ion_page_pool.c

@@ -33,8 +33,8 @@ static void ion_page_pool_add(struct ion_page_pool *pool, struct page *page)
 		pool->low_count++;
 		pool->low_count++;
 	}
 	}
 
 
-	mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
-			    (1 << (PAGE_SHIFT + pool->order)));
+	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+							1 << pool->order);
 	mutex_unlock(&pool->mutex);
 	mutex_unlock(&pool->mutex);
 }
 }
 
 
@@ -53,8 +53,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high)
 	}
 	}
 
 
 	list_del(&page->lru);
 	list_del(&page->lru);
-	mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
-			    -(1 << (PAGE_SHIFT + pool->order)));
+	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+							-(1 << pool->order));
 	return page;
 	return page;
 }
 }
 
 

+ 4 - 1
fs/cramfs/inode.c

@@ -418,9 +418,12 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
 		int i;
 		int i;
 		vma->vm_flags |= VM_MIXEDMAP;
 		vma->vm_flags |= VM_MIXEDMAP;
 		for (i = 0; i < pages && !ret; i++) {
 		for (i = 0; i < pages && !ret; i++) {
+			vm_fault_t vmf;
 			unsigned long off = i * PAGE_SIZE;
 			unsigned long off = i * PAGE_SIZE;
 			pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV);
 			pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV);
-			ret = vm_insert_mixed(vma, vma->vm_start + off, pfn);
+			vmf = vmf_insert_mixed(vma, vma->vm_start + off, pfn);
+			if (vmf & VM_FAULT_ERROR)
+				ret = vm_fault_to_errno(vmf, 0);
 		}
 		}
 	}
 	}
 
 

+ 9 - 29
fs/dcache.c

@@ -257,24 +257,10 @@ static void __d_free(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 	kmem_cache_free(dentry_cache, dentry); 
 }
 }
 
 
-static void __d_free_external_name(struct rcu_head *head)
-{
-	struct external_name *name = container_of(head, struct external_name,
-						  u.head);
-
-	mod_node_page_state(page_pgdat(virt_to_page(name)),
-			    NR_INDIRECTLY_RECLAIMABLE_BYTES,
-			    -ksize(name));
-
-	kfree(name);
-}
-
 static void __d_free_external(struct rcu_head *head)
 static void __d_free_external(struct rcu_head *head)
 {
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-
-	__d_free_external_name(&external_name(dentry)->u.head);
-
+	kfree(external_name(dentry));
 	kmem_cache_free(dentry_cache, dentry);
 	kmem_cache_free(dentry_cache, dentry);
 }
 }
 
 
@@ -306,7 +292,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 		struct external_name *p;
 		struct external_name *p;
 		p = container_of(name->name, struct external_name, name[0]);
 		p = container_of(name->name, struct external_name, name[0]);
 		if (unlikely(atomic_dec_and_test(&p->u.count)))
 		if (unlikely(atomic_dec_and_test(&p->u.count)))
-			call_rcu(&p->u.head, __d_free_external_name);
+			kfree_rcu(p, u.head);
 	}
 	}
 }
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1606,7 +1592,6 @@ EXPORT_SYMBOL(d_invalidate);
  
  
 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
 {
-	struct external_name *ext = NULL;
 	struct dentry *dentry;
 	struct dentry *dentry;
 	char *dname;
 	char *dname;
 	int err;
 	int err;
@@ -1627,14 +1612,15 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		dname = dentry->d_iname;
 		dname = dentry->d_iname;
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 		size_t size = offsetof(struct external_name, name[1]);
 		size_t size = offsetof(struct external_name, name[1]);
-
-		ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
-		if (!ext) {
+		struct external_name *p = kmalloc(size + name->len,
+						  GFP_KERNEL_ACCOUNT |
+						  __GFP_RECLAIMABLE);
+		if (!p) {
 			kmem_cache_free(dentry_cache, dentry); 
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 			return NULL;
 		}
 		}
-		atomic_set(&ext->u.count, 1);
-		dname = ext->name;
+		atomic_set(&p->u.count, 1);
+		dname = p->name;
 	} else  {
 	} else  {
 		dname = dentry->d_iname;
 		dname = dentry->d_iname;
 	}	
 	}	
@@ -1673,12 +1659,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		}
 		}
 	}
 	}
 
 
-	if (unlikely(ext)) {
-		pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
-		mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
-				    ksize(ext));
-	}
-
 	this_cpu_inc(nr_dentry);
 	this_cpu_inc(nr_dentry);
 
 
 	return dentry;
 	return dentry;
@@ -2707,7 +2687,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 		dentry->d_name.hash_len = target->d_name.hash_len;
 		dentry->d_name.hash_len = target->d_name.hash_len;
 	}
 	}
 	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
 	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-		call_rcu(&old_name->u.head, __d_free_external_name);
+		kfree_rcu(old_name, u.head);
 }
 }
 
 
 /*
 /*

+ 1 - 1
fs/iomap.c

@@ -1057,7 +1057,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	return length;
 	return length;
 }
 }
 
 
-int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 {
 {
 	struct page *page = vmf->page;
 	struct page *page = vmf->page;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct inode *inode = file_inode(vmf->vma->vm_file);

+ 3 - 0
fs/kernfs/mount.c

@@ -236,6 +236,9 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 		sb->s_export_op = &kernfs_export_ops;
 		sb->s_export_op = &kernfs_export_ops;
 	sb->s_time_gran = 1;
 	sb->s_time_gran = 1;
 
 
+	/* sysfs dentries and inodes don't require IO to create */
+	sb->s_shrink.seeks = 0;
+
 	/* get root inode, initialize and unlock it */
 	/* get root inode, initialize and unlock it */
 	mutex_lock(&kernfs_mutex);
 	mutex_lock(&kernfs_mutex);
 	inode = kernfs_get_inode(sb, info->root->kn);
 	inode = kernfs_get_inode(sb, info->root->kn);

+ 0 - 4
fs/ocfs2/alloc.c

@@ -5106,8 +5106,6 @@ int ocfs2_split_extent(handle_t *handle,
 	 * rightmost extent list.
 	 * rightmost extent list.
 	 */
 	 */
 	if (path->p_tree_depth) {
 	if (path->p_tree_depth) {
-		struct ocfs2_extent_block *eb;
-
 		ret = ocfs2_read_extent_block(et->et_ci,
 		ret = ocfs2_read_extent_block(et->et_ci,
 					      ocfs2_et_get_last_eb_blk(et),
 					      ocfs2_et_get_last_eb_blk(et),
 					      &last_eb_bh);
 					      &last_eb_bh);
@@ -5115,8 +5113,6 @@ int ocfs2_split_extent(handle_t *handle,
 			mlog_errno(ret);
 			mlog_errno(ret);
 			goto out;
 			goto out;
 		}
 		}
-
-		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
 	}
 	}
 
 
 	if (rec->e_cpos == split_rec->e_cpos &&
 	if (rec->e_cpos == split_rec->e_cpos &&

+ 1 - 2
fs/ocfs2/aops.c

@@ -1392,8 +1392,7 @@ retry:
 unlock:
 unlock:
 	spin_unlock(&oi->ip_lock);
 	spin_unlock(&oi->ip_lock);
 out:
 out:
-	if (new)
-		kfree(new);
+	kfree(new);
 	return ret;
 	return ret;
 }
 }
 
 

+ 1 - 1
fs/ocfs2/dlm/dlmdebug.c

@@ -329,7 +329,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle)
 {
 {
 	char *buf;
 	char *buf;
 
 
-	buf = (char *) get_zeroed_page(GFP_NOFS);
+	buf = (char *) get_zeroed_page(GFP_ATOMIC);
 	if (buf) {
 	if (buf) {
 		dump_mle(mle, buf, PAGE_SIZE - 1);
 		dump_mle(mle, buf, PAGE_SIZE - 1);
 		free_page((unsigned long)buf);
 		free_page((unsigned long)buf);

+ 1 - 1
fs/ocfs2/dlm/dlmthread.c

@@ -531,7 +531,7 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 	assert_spin_locked(&res->spinlock);
 	assert_spin_locked(&res->spinlock);
 
 
 	/* don't shuffle secondary queues */
 	/* don't shuffle secondary queues */
-	if ((res->owner == dlm->node_num)) {
+	if (res->owner == dlm->node_num) {
 		if (res->state & (DLM_LOCK_RES_MIGRATING |
 		if (res->state & (DLM_LOCK_RES_MIGRATING |
 				  DLM_LOCK_RES_BLOCK_DIRTY))
 				  DLM_LOCK_RES_BLOCK_DIRTY))
 		    return;
 		    return;

+ 0 - 2
fs/ocfs2/refcounttree.c

@@ -4135,7 +4135,6 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 	struct buffer_head *ref_root_bh = NULL;
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
 	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
-	struct ocfs2_refcount_block *rb;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
 	struct ocfs2_refcount_tree *ref_tree;
 	struct ocfs2_refcount_tree *ref_tree;
 
 
@@ -4162,7 +4161,6 @@ static int ocfs2_create_reflink_node(struct inode *s_inode,
 		mlog_errno(ret);
 		mlog_errno(ret);
 		goto out;
 		goto out;
 	}
 	}
-	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
 
 
 	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
 	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
 					  &ref_tree->rf_ci, ref_root_bh,
 					  &ref_tree->rf_ci, ref_root_bh,

+ 3 - 0
fs/proc/inode.c

@@ -516,6 +516,9 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
 	 */
 	 */
 	s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
 	s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
 	
 	
+	/* procfs dentries and inodes don't require IO to create */
+	s->s_shrink.seeks = 0;
+
 	pde_get(&proc_root);
 	pde_get(&proc_root);
 	root_inode = proc_get_inode(s, &proc_root);
 	root_inode = proc_get_inode(s, &proc_root);
 	if (!root_inode) {
 	if (!root_inode) {

+ 0 - 3
fs/proc/loadavg.c

@@ -10,9 +10,6 @@
 #include <linux/seqlock.h>
 #include <linux/seqlock.h>
 #include <linux/time.h>
 #include <linux/time.h>
 
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int loadavg_proc_show(struct seq_file *m, void *v)
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
 {
 	unsigned long avnrun[3];
 	unsigned long avnrun[3];

+ 8 - 8
fs/proc/meminfo.c

@@ -38,6 +38,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	long cached;
 	long cached;
 	long available;
 	long available;
 	unsigned long pages[NR_LRU_LISTS];
 	unsigned long pages[NR_LRU_LISTS];
+	unsigned long sreclaimable, sunreclaim;
 	int lru;
 	int lru;
 
 
 	si_meminfo(&i);
 	si_meminfo(&i);
@@ -53,6 +54,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
 
 	available = si_mem_available();
 	available = si_mem_available();
+	sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
+	sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
 
 
 	show_val_kb(m, "MemTotal:       ", i.totalram);
 	show_val_kb(m, "MemTotal:       ", i.totalram);
 	show_val_kb(m, "MemFree:        ", i.freeram);
 	show_val_kb(m, "MemFree:        ", i.freeram);
@@ -94,14 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 	show_val_kb(m, "Mapped:         ",
 	show_val_kb(m, "Mapped:         ",
 		    global_node_page_state(NR_FILE_MAPPED));
 		    global_node_page_state(NR_FILE_MAPPED));
 	show_val_kb(m, "Shmem:          ", i.sharedram);
 	show_val_kb(m, "Shmem:          ", i.sharedram);
-	show_val_kb(m, "Slab:           ",
-		    global_node_page_state(NR_SLAB_RECLAIMABLE) +
-		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
-
-	show_val_kb(m, "SReclaimable:   ",
-		    global_node_page_state(NR_SLAB_RECLAIMABLE));
-	show_val_kb(m, "SUnreclaim:     ",
-		    global_node_page_state(NR_SLAB_UNRECLAIMABLE));
+	show_val_kb(m, "KReclaimable:   ", sreclaimable +
+		    global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
+	show_val_kb(m, "Slab:           ", sreclaimable + sunreclaim);
+	show_val_kb(m, "SReclaimable:   ", sreclaimable);
+	show_val_kb(m, "SUnreclaim:     ", sunreclaim);
 	seq_printf(m, "KernelStack:    %8lu kB\n",
 	seq_printf(m, "KernelStack:    %8lu kB\n",
 		   global_zone_page_state(NR_KERNEL_STACK_KB));
 		   global_zone_page_state(NR_KERNEL_STACK_KB));
 	show_val_kb(m, "PageTables:     ",
 	show_val_kb(m, "PageTables:     ",

+ 3 - 1
fs/proc/task_mmu.c

@@ -713,6 +713,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
 	smaps_walk.private = mss;
 	smaps_walk.private = mss;
 
 
 #ifdef CONFIG_SHMEM
 #ifdef CONFIG_SHMEM
+	/* In case of smaps_rollup, reset the value from previous vma */
+	mss->check_shmem_swap = false;
 	if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
 	if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
 		/*
 		/*
 		 * For shared or readonly shmem mappings we know that all
 		 * For shared or readonly shmem mappings we know that all
@@ -728,7 +730,7 @@ static void smap_gather_stats(struct vm_area_struct *vma,
 
 
 		if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
 		if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
 					!(vma->vm_flags & VM_WRITE)) {
 					!(vma->vm_flags & VM_WRITE)) {
-			mss->swap = shmem_swapped;
+			mss->swap += shmem_swapped;
 		} else {
 		} else {
 			mss->check_shmem_swap = true;
 			mss->check_shmem_swap = true;
 			smaps_walk.pte_hole = smaps_pte_hole;
 			smaps_walk.pte_hole = smaps_pte_hole;

+ 4 - 4
fs/userfaultfd.c

@@ -1026,7 +1026,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 	struct userfaultfd_ctx *fork_nctx = NULL;
 	struct userfaultfd_ctx *fork_nctx = NULL;
 
 
 	/* always take the fd_wqh lock before the fault_pending_wqh lock */
 	/* always take the fd_wqh lock before the fault_pending_wqh lock */
-	spin_lock(&ctx->fd_wqh.lock);
+	spin_lock_irq(&ctx->fd_wqh.lock);
 	__add_wait_queue(&ctx->fd_wqh, &wait);
 	__add_wait_queue(&ctx->fd_wqh, &wait);
 	for (;;) {
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1112,13 +1112,13 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			ret = -EAGAIN;
 			ret = -EAGAIN;
 			break;
 			break;
 		}
 		}
-		spin_unlock(&ctx->fd_wqh.lock);
+		spin_unlock_irq(&ctx->fd_wqh.lock);
 		schedule();
 		schedule();
-		spin_lock(&ctx->fd_wqh.lock);
+		spin_lock_irq(&ctx->fd_wqh.lock);
 	}
 	}
 	__remove_wait_queue(&ctx->fd_wqh, &wait);
 	__remove_wait_queue(&ctx->fd_wqh, &wait);
 	__set_current_state(TASK_RUNNING);
 	__set_current_state(TASK_RUNNING);
-	spin_unlock(&ctx->fd_wqh.lock);
+	spin_unlock_irq(&ctx->fd_wqh.lock);
 
 
 	if (!ret && msg->event == UFFD_EVENT_FORK) {
 	if (!ret && msg->event == UFFD_EVENT_FORK) {
 		ret = resolve_userfault_fork(ctx, fork_nctx, msg);
 		ret = resolve_userfault_fork(ctx, fork_nctx, msg);

+ 87 - 1
include/asm-generic/hugetlb.h

@@ -32,7 +32,7 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 	return pte_modify(pte, newprot);
 	return pte_modify(pte, newprot);
 }
 }
 
 
-#ifndef huge_pte_clear
+#ifndef __HAVE_ARCH_HUGE_PTE_CLEAR
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, unsigned long sz)
 		    pte_t *ptep, unsigned long sz)
 {
 {
@@ -40,4 +40,90 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 }
 }
 #endif
 #endif
 
 
+#ifndef __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+		unsigned long addr, unsigned long end,
+		unsigned long floor, unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	set_pte_at(mm, addr, ptep, pte);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return ptep_get_and_clear(mm, addr, ptep);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
+{
+	ptep_clear_flush(vma, addr, ptep);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTE_NONE
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+#endif
+
+#ifndef __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE
+static inline int prepare_hugepage_range(struct file *file,
+		unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep,
+		pte_t pte, int dirty)
+{
+	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+#endif
+
+#ifndef __HAVE_ARCH_HUGE_PTEP_GET
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+#endif
+
 #endif /* _ASM_GENERIC_HUGETLB_H */
 #endif /* _ASM_GENERIC_HUGETLB_H */

+ 2 - 2
include/asm-generic/pgtable.h

@@ -757,7 +757,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 /*
 /*
  * Interfaces that can be used by architecture code to keep track of
  * Interfaces that can be used by architecture code to keep track of
  * memory type of pfn mappings specified by the remap_pfn_range,
  * memory type of pfn mappings specified by the remap_pfn_range,
- * vm_insert_pfn.
+ * vmf_insert_pfn.
  */
  */
 
 
 /*
 /*
@@ -773,7 +773,7 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 
 
 /*
 /*
  * track_pfn_insert is called when a _new_ single pfn is established
  * track_pfn_insert is called when a _new_ single pfn is established
- * by vm_insert_pfn().
+ * by vmf_insert_pfn().
  */
  */
 static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 				    pfn_t pfn)
 				    pfn_t pfn)

+ 4 - 0
include/linux/cgroup-defs.h

@@ -20,6 +20,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/psi_types.h>
 
 
 #ifdef CONFIG_CGROUPS
 #ifdef CONFIG_CGROUPS
 
 
@@ -436,6 +437,9 @@ struct cgroup {
 	/* used to schedule release agent */
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 	struct work_struct release_agent_work;
 
 
+	/* used to track pressure stalls */
+	struct psi_group psi;
+
 	/* used to store eBPF programs */
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 	struct cgroup_bpf bpf;
 
 

+ 15 - 0
include/linux/cgroup.h

@@ -650,6 +650,11 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 	pr_cont_kernfs_path(cgrp->kn);
 }
 }
 
 
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return &cgrp->psi;
+}
+
 static inline void cgroup_init_kthreadd(void)
 static inline void cgroup_init_kthreadd(void)
 {
 {
 	/*
 	/*
@@ -703,6 +708,16 @@ static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp)
 	return NULL;
 	return NULL;
 }
 }
 
 
+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
 					       struct cgroup *ancestor)
 {
 {

+ 23 - 0
include/linux/delayacct.h

@@ -57,7 +57,12 @@ struct task_delay_info {
 
 
 	u64 freepages_start;
 	u64 freepages_start;
 	u64 freepages_delay;	/* wait for memory reclaim */
 	u64 freepages_delay;	/* wait for memory reclaim */
+
+	u64 thrashing_start;
+	u64 thrashing_delay;	/* wait for thrashing page */
+
 	u32 freepages_count;	/* total count of memory reclaim */
 	u32 freepages_count;	/* total count of memory reclaim */
+	u32 thrashing_count;	/* total count of thrash waits */
 };
 };
 #endif
 #endif
 
 
@@ -76,6 +81,8 @@ extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
 extern void __delayacct_freepages_end(void);
+extern void __delayacct_thrashing_start(void);
+extern void __delayacct_thrashing_end(void);
 
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
 {
@@ -156,6 +163,18 @@ static inline void delayacct_freepages_end(void)
 		__delayacct_freepages_end();
 		__delayacct_freepages_end();
 }
 }
 
 
+static inline void delayacct_thrashing_start(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_start();
+}
+
+static inline void delayacct_thrashing_end(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_end();
+}
+
 #else
 #else
 static inline void delayacct_set_flag(int flag)
 static inline void delayacct_set_flag(int flag)
 {}
 {}
@@ -182,6 +201,10 @@ static inline void delayacct_freepages_start(void)
 {}
 {}
 static inline void delayacct_freepages_end(void)
 static inline void delayacct_freepages_end(void)
 {}
 {}
+static inline void delayacct_thrashing_start(void)
+{}
+static inline void delayacct_thrashing_end(void)
+{}
 
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 

+ 1 - 1
include/linux/hmm.h

@@ -107,7 +107,7 @@ enum hmm_pfn_flag_e {
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
  * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
- *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
+ *      result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      set and the pfn value is undefined.
  *      set and the pfn value is undefined.
  *
  *

+ 4 - 4
include/linux/huge_mm.h

@@ -213,9 +213,9 @@ static inline int hpage_nr_pages(struct page *page)
 }
 }
 
 
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-		pmd_t *pmd, int flags);
+		pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
-		pud_t *pud, int flags);
+		pud_t *pud, int flags, struct dev_pagemap **pgmap);
 
 
 extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 
@@ -344,13 +344,13 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 }
 }
 
 
 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
-		unsigned long addr, pmd_t *pmd, int flags)
+	unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
 {
 {
 	return NULL;
 	return NULL;
 }
 }
 
 
 static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
 static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
-		unsigned long addr, pud_t *pud, int flags)
+	unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
 {
 {
 	return NULL;
 	return NULL;
 }
 }

+ 3 - 1
include/linux/iomap.h

@@ -6,6 +6,7 @@
 #include <linux/bitmap.h>
 #include <linux/bitmap.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/types.h>
+#include <linux/mm_types.h>
 
 
 struct address_space;
 struct address_space;
 struct fiemap_extent_info;
 struct fiemap_extent_info;
@@ -141,7 +142,8 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, const struct iomap_ops *ops);
 		bool *did_zero, const struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
 		const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
+			const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
 		loff_t start, loff_t len, const struct iomap_ops *ops);
 loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
 loff_t iomap_seek_hole(struct inode *inode, loff_t offset,

+ 1 - 0
include/linux/linkage.h

@@ -90,6 +90,7 @@
 #ifndef WEAK
 #ifndef WEAK
 #define WEAK(name)	   \
 #define WEAK(name)	   \
 	.weak name ASM_NL   \
 	.weak name ASM_NL   \
+	ALIGN ASM_NL \
 	name:
 	name:
 #endif
 #endif
 
 

+ 3 - 0
include/linux/math64.h

@@ -281,4 +281,7 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 }
 #endif /* mul_u64_u32_div */
 #endif /* mul_u64_u32_div */
 
 
+#define DIV64_U64_ROUND_UP(ll, d)	\
+	({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })
+
 #endif /* _LINUX_MATH64_H */
 #endif /* _LINUX_MATH64_H */

+ 0 - 15
include/linux/memblock.h

@@ -265,21 +265,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
 	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
 			       nid, flags, p_start, p_end, p_nid)
 			       nid, flags, p_start, p_end, p_nid)
 
 
-/**
- * for_each_resv_unavail_range - iterate through reserved and unavailable memory
- * @i: u64 used as loop variable
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over unavailable but reserved (reserved && !memory) areas of memblock.
- * Available as soon as memblock is initialized.
- * Note: because this memory does not belong to any physical node, flags and
- * nid arguments do not make sense and thus not exported as arguments.
- */
-#define for_each_resv_unavail_range(i, p_start, p_end)			\
-	for_each_mem_range(i, &memblock.reserved, &memblock.memory,	\
-			   NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL)
-
 static inline void memblock_set_region_flags(struct memblock_region *r,
 static inline void memblock_set_region_flags(struct memblock_region *r,
 					     enum memblock_flags flags)
 					     enum memblock_flags flags)
 {
 {

+ 13 - 2
include/linux/memcontrol.h

@@ -78,7 +78,7 @@ struct mem_cgroup_reclaim_cookie {
 
 
 struct mem_cgroup_id {
 struct mem_cgroup_id {
 	int id;
 	int id;
-	atomic_t ref;
+	refcount_t ref;
 };
 };
 
 
 /*
 /*
@@ -1268,10 +1268,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
 int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 			    struct mem_cgroup *memcg);
 			    struct mem_cgroup *memcg);
+
+#ifdef CONFIG_MEMCG_KMEM
 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void memcg_kmem_uncharge(struct page *page, int order);
 void memcg_kmem_uncharge(struct page *page, int order);
 
 
-#ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
 
 
@@ -1307,6 +1308,16 @@ extern int memcg_expand_shrinker_maps(int new_id);
 extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 				   int nid, int shrinker_id);
 				   int nid, int shrinker_id);
 #else
 #else
+
+static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+{
+	return 0;
+}
+
+static inline void memcg_kmem_uncharge(struct page *page, int order)
+{
+}
+
 #define for_each_memcg_cache_index(_idx)	\
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
 	for (; NULL; )
 
 

+ 9 - 39
include/linux/mm.h

@@ -848,6 +848,8 @@ static inline bool is_zone_device_page(const struct page *page)
 {
 {
 	return page_zonenum(page) == ZONE_DEVICE;
 	return page_zonenum(page) == ZONE_DEVICE;
 }
 }
+extern void memmap_init_zone_device(struct zone *, unsigned long,
+				    unsigned long, struct dev_pagemap *);
 #else
 #else
 static inline bool is_zone_device_page(const struct page *page)
 static inline bool is_zone_device_page(const struct page *page)
 {
 {
@@ -2304,6 +2306,8 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
 	struct list_head *uf);
 	struct list_head *uf);
+extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
+		       struct list_head *uf, bool downgrade);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
 		     struct list_head *uf);
 
 
@@ -2502,11 +2506,11 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 			unsigned long pfn);
-int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
 			unsigned long pfn, pgprot_t pgprot);
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 			pfn_t pfn);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
 		unsigned long addr, pfn_t pfn);
 		unsigned long addr, pfn_t pfn);
@@ -2525,32 +2529,6 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 	return VM_FAULT_NOPAGE;
 }
 }
 
 
-static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma,
-				unsigned long addr, pfn_t pfn)
-{
-	int err = vm_insert_mixed(vma, addr, pfn);
-
-	if (err == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (err < 0 && err != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
-}
-
-static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
-			unsigned long addr, unsigned long pfn)
-{
-	int err = vm_insert_pfn(vma, addr, pfn);
-
-	if (err == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (err < 0 && err != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
-}
-
 static inline vm_fault_t vmf_error(int err)
 static inline vm_fault_t vmf_error(int err)
 {
 {
 	if (err == -ENOMEM)
 	if (err == -ENOMEM)
@@ -2558,16 +2536,8 @@ static inline vm_fault_t vmf_error(int err)
 	return VM_FAULT_SIGBUS;
 	return VM_FAULT_SIGBUS;
 }
 }
 
 
-struct page *follow_page_mask(struct vm_area_struct *vma,
-			      unsigned long address, unsigned int foll_flags,
-			      unsigned int *page_mask);
-
-static inline struct page *follow_page(struct vm_area_struct *vma,
-		unsigned long address, unsigned int foll_flags)
-{
-	unsigned int unused_page_mask;
-	return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
-}
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+			 unsigned int foll_flags);
 
 
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_TOUCH	0x02	/* mark page accessed */

+ 3 - 24
include/linux/mmu_notifier.h

@@ -2,7 +2,6 @@
 #ifndef _LINUX_MMU_NOTIFIER_H
 #ifndef _LINUX_MMU_NOTIFIER_H
 #define _LINUX_MMU_NOTIFIER_H
 #define _LINUX_MMU_NOTIFIER_H
 
 
-#include <linux/types.h>
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/mm_types.h>
 #include <linux/mm_types.h>
@@ -11,9 +10,6 @@
 struct mmu_notifier;
 struct mmu_notifier;
 struct mmu_notifier_ops;
 struct mmu_notifier_ops;
 
 
-/* mmu_notifier_ops flags */
-#define MMU_INVALIDATE_DOES_NOT_BLOCK	(0x01)
-
 #ifdef CONFIG_MMU_NOTIFIER
 #ifdef CONFIG_MMU_NOTIFIER
 
 
 /*
 /*
@@ -30,15 +26,6 @@ struct mmu_notifier_mm {
 };
 };
 
 
 struct mmu_notifier_ops {
 struct mmu_notifier_ops {
-	/*
-	 * Flags to specify behavior of callbacks for this MMU notifier.
-	 * Used to determine which context an operation may be called.
-	 *
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
-	 *	block
-	 */
-	int flags;
-
 	/*
 	/*
 	 * Called either by mmu_notifier_unregister or when the mm is
 	 * Called either by mmu_notifier_unregister or when the mm is
 	 * being destroyed by exit_mmap, always before all pages are
 	 * being destroyed by exit_mmap, always before all pages are
@@ -153,7 +140,9 @@ struct mmu_notifier_ops {
 	 *
 	 *
 	 * If blockable argument is set to false then the callback cannot
 	 * If blockable argument is set to false then the callback cannot
 	 * sleep and has to return with -EAGAIN. 0 should be returned
 	 * sleep and has to return with -EAGAIN. 0 should be returned
-	 * otherwise.
+	 * otherwise. Please note that if invalidate_range_start approves
+	 * a non-blocking behavior then the same applies to
+	 * invalidate_range_end.
 	 *
 	 *
 	 */
 	 */
 	int (*invalidate_range_start)(struct mmu_notifier *mn,
 	int (*invalidate_range_start)(struct mmu_notifier *mn,
@@ -181,10 +170,6 @@ struct mmu_notifier_ops {
 	 * Note that this function might be called with just a sub-range
 	 * Note that this function might be called with just a sub-range
 	 * of what was passed to invalidate_range_start()/end(), if
 	 * of what was passed to invalidate_range_start()/end(), if
 	 * called between those functions.
 	 * called between those functions.
-	 *
-	 * If this callback cannot block, and invalidate_range_{start,end}
-	 * cannot block, mmu_notifier_ops.flags should have
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 	 */
 	 */
 	void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 	void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 				 unsigned long start, unsigned long end);
 				 unsigned long start, unsigned long end);
@@ -239,7 +224,6 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  bool only_end);
 				  bool only_end);
 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 				  unsigned long start, unsigned long end);
 				  unsigned long start, unsigned long end);
-extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
 
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
 {
@@ -493,11 +477,6 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 {
 {
 }
 }
 
 
-static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
-{
-	return false;
-}
-
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 {
 }
 }

+ 3 - 1
include/linux/mmzone.h

@@ -161,8 +161,10 @@ enum node_stat_item {
 	NR_SLAB_UNRECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
+	WORKINGSET_NODES,
 	WORKINGSET_REFAULT,
 	WORKINGSET_REFAULT,
 	WORKINGSET_ACTIVATE,
 	WORKINGSET_ACTIVATE,
+	WORKINGSET_RESTORE,
 	WORKINGSET_NODERECLAIM,
 	WORKINGSET_NODERECLAIM,
 	NR_ANON_MAPPED,	/* Mapped anonymous pages */
 	NR_ANON_MAPPED,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
@@ -180,7 +182,7 @@ enum node_stat_item {
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
-	NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
+	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_VM_NODE_STAT_ITEMS
 	NR_VM_NODE_STAT_ITEMS
 };
 };
 
 

+ 13 - 1
include/linux/page-flags.h

@@ -69,13 +69,14 @@
  */
  */
 enum pageflags {
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
 	PG_locked,		/* Page is locked. Don't touch. */
-	PG_error,
 	PG_referenced,
 	PG_referenced,
 	PG_uptodate,
 	PG_uptodate,
 	PG_dirty,
 	PG_dirty,
 	PG_lru,
 	PG_lru,
 	PG_active,
 	PG_active,
+	PG_workingset,
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
+	PG_error,
 	PG_slab,
 	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
 	PG_arch_1,
@@ -162,6 +163,14 @@ static inline int PagePoisoned(const struct page *page)
 	return page->flags == PAGE_POISON_PATTERN;
 	return page->flags == PAGE_POISON_PATTERN;
 }
 }
 
 
+#ifdef CONFIG_DEBUG_VM
+void page_init_poison(struct page *page, size_t size);
+#else
+static inline void page_init_poison(struct page *page, size_t size)
+{
+}
+#endif
+
 /*
 /*
  * Page flags policies wrt compound pages
  * Page flags policies wrt compound pages
  *
  *
@@ -280,6 +289,8 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
 PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
 PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
 PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
+PAGEFLAG(Workingset, workingset, PF_HEAD)
+	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
 __PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
@@ -292,6 +303,7 @@ PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
 
 
 PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+	__SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)

+ 3 - 1
include/linux/pfn_t.h

@@ -9,8 +9,10 @@
  * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
  * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
+ * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
+ *		 get_user_pages
  */
  */
-#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
+#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
 #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
 #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
 #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
 #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))

+ 53 - 0
include/linux/psi.h

@@ -0,0 +1,53 @@
+#ifndef _LINUX_PSI_H
+#define _LINUX_PSI_H
+
+#include <linux/psi_types.h>
+#include <linux/sched.h>
+
+struct seq_file;
+struct css_set;
+
+#ifdef CONFIG_PSI
+
+extern bool psi_disabled;
+
+void psi_init(void);
+
+void psi_task_change(struct task_struct *task, int clear, int set);
+
+void psi_memstall_tick(struct task_struct *task, int cpu);
+void psi_memstall_enter(unsigned long *flags);
+void psi_memstall_leave(unsigned long *flags);
+
+int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgrp);
+void psi_cgroup_free(struct cgroup *cgrp);
+void cgroup_move_task(struct task_struct *p, struct css_set *to);
+#endif
+
+#else /* CONFIG_PSI */
+
+static inline void psi_init(void) {}
+
+static inline void psi_memstall_enter(unsigned long *flags) {}
+static inline void psi_memstall_leave(unsigned long *flags) {}
+
+#ifdef CONFIG_CGROUPS
+static inline int psi_cgroup_alloc(struct cgroup *cgrp)
+{
+	return 0;
+}
+static inline void psi_cgroup_free(struct cgroup *cgrp)
+{
+}
+static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
+{
+	rcu_assign_pointer(p->cgroups, to);
+}
+#endif
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_H */

+ 92 - 0
include/linux/psi_types.h

@@ -0,0 +1,92 @@
+#ifndef _LINUX_PSI_TYPES_H
+#define _LINUX_PSI_TYPES_H
+
+#include <linux/seqlock.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_PSI
+
+/* Tracked task states */
+enum psi_task_count {
+	NR_IOWAIT,
+	NR_MEMSTALL,
+	NR_RUNNING,
+	NR_PSI_TASK_COUNTS,
+};
+
+/* Task state bitmasks */
+#define TSK_IOWAIT	(1 << NR_IOWAIT)
+#define TSK_MEMSTALL	(1 << NR_MEMSTALL)
+#define TSK_RUNNING	(1 << NR_RUNNING)
+
+/* Resources that workloads could be stalled on */
+enum psi_res {
+	PSI_IO,
+	PSI_MEM,
+	PSI_CPU,
+	NR_PSI_RESOURCES,
+};
+
+/*
+ * Pressure states for each resource:
+ *
+ * SOME: Stalled tasks & working tasks
+ * FULL: Stalled tasks & no working tasks
+ */
+enum psi_states {
+	PSI_IO_SOME,
+	PSI_IO_FULL,
+	PSI_MEM_SOME,
+	PSI_MEM_FULL,
+	PSI_CPU_SOME,
+	/* Only per-CPU, to weigh the CPU in the global average: */
+	PSI_NONIDLE,
+	NR_PSI_STATES,
+};
+
+struct psi_group_cpu {
+	/* 1st cacheline updated by the scheduler */
+
+	/* Aggregator needs to know of concurrent changes */
+	seqcount_t seq ____cacheline_aligned_in_smp;
+
+	/* States of the tasks belonging to this group */
+	unsigned int tasks[NR_PSI_TASK_COUNTS];
+
+	/* Period time sampling buckets for each state of interest (ns) */
+	u32 times[NR_PSI_STATES];
+
+	/* Time of last task change in this group (rq_clock) */
+	u64 state_start;
+
+	/* 2nd cacheline updated by the aggregator */
+
+	/* Delta detection against the sampling buckets */
+	u32 times_prev[NR_PSI_STATES] ____cacheline_aligned_in_smp;
+};
+
+struct psi_group {
+	/* Protects data updated during an aggregation */
+	struct mutex stat_lock;
+
+	/* Per-cpu task state & time tracking */
+	struct psi_group_cpu __percpu *pcpu;
+
+	/* Periodic aggregation state */
+	u64 total_prev[NR_PSI_STATES - 1];
+	u64 last_update;
+	u64 next_update;
+	struct delayed_work clock_work;
+
+	/* Total stall times and sampled pressure averages */
+	u64 total[NR_PSI_STATES - 1];
+	unsigned long avg[NR_PSI_STATES - 1][3];
+};
+
+#else /* CONFIG_PSI */
+
+struct psi_group { };
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_TYPES_H */

+ 10 - 3
include/linux/sched.h

@@ -25,6 +25,7 @@
 #include <linux/latencytop.h>
 #include <linux/latencytop.h>
 #include <linux/sched/prio.h>
 #include <linux/sched/prio.h>
 #include <linux/signal_types.h>
 #include <linux/signal_types.h>
+#include <linux/psi_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
 #include <linux/task_io_accounting.h>
 #include <linux/rseq.h>
 #include <linux/rseq.h>
@@ -706,6 +707,10 @@ struct task_struct {
 	unsigned			sched_contributes_to_load:1;
 	unsigned			sched_contributes_to_load:1;
 	unsigned			sched_migrated:1;
 	unsigned			sched_migrated:1;
 	unsigned			sched_remote_wakeup:1;
 	unsigned			sched_remote_wakeup:1;
+#ifdef CONFIG_PSI
+	unsigned			sched_psi_wake_requeue:1;
+#endif
+
 	/* Force alignment to the next boundary: */
 	/* Force alignment to the next boundary: */
 	unsigned			:0;
 	unsigned			:0;
 
 
@@ -719,9 +724,6 @@ struct task_struct {
 #endif
 #endif
 #ifdef CONFIG_MEMCG
 #ifdef CONFIG_MEMCG
 	unsigned			in_user_fault:1;
 	unsigned			in_user_fault:1;
-#ifdef CONFIG_MEMCG_KMEM
-	unsigned			memcg_kmem_skip_account:1;
-#endif
 #endif
 #endif
 #ifdef CONFIG_COMPAT_BRK
 #ifdef CONFIG_COMPAT_BRK
 	unsigned			brk_randomized:1;
 	unsigned			brk_randomized:1;
@@ -965,6 +967,10 @@ struct task_struct {
 	kernel_siginfo_t		*last_siginfo;
 	kernel_siginfo_t		*last_siginfo;
 
 
 	struct task_io_accounting	ioac;
 	struct task_io_accounting	ioac;
+#ifdef CONFIG_PSI
+	/* Pressure stall state */
+	unsigned int			psi_flags;
+#endif
 #ifdef CONFIG_TASK_XACCT
 #ifdef CONFIG_TASK_XACCT
 	/* Accumulated RSS usage: */
 	/* Accumulated RSS usage: */
 	u64				acct_rss_mem1;
 	u64				acct_rss_mem1;
@@ -1391,6 +1397,7 @@ extern struct pid *cad_pid;
 #define PF_KTHREAD		0x00200000	/* I am a kernel thread */
 #define PF_KTHREAD		0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */

+ 20 - 4
include/linux/sched/loadavg.h

@@ -22,10 +22,26 @@ extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 #define EXP_5		2014		/* 1/exp(5sec/5min) */
 #define EXP_5		2014		/* 1/exp(5sec/5min) */
 #define EXP_15		2037		/* 1/exp(5sec/15min) */
 #define EXP_15		2037		/* 1/exp(5sec/15min) */
 
 
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
+/*
+ * a1 = a0 * e + a * (1 - e)
+ */
+static inline unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	unsigned long newload;
+
+	newload = load * exp + active * (FIXED_1 - exp);
+	if (active >= load)
+		newload += FIXED_1-1;
+
+	return newload / FIXED_1;
+}
+
+extern unsigned long calc_load_n(unsigned long load, unsigned long exp,
+				 unsigned long active, unsigned int n);
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
 
 extern void calc_global_load(unsigned long ticks);
 extern void calc_global_load(unsigned long ticks);
 
 

+ 45 - 11
include/linux/slab.h

@@ -295,12 +295,43 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
 #define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
 #define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
                                (KMALLOC_MIN_SIZE) : 16)
                                (KMALLOC_MIN_SIZE) : 16)
 
 
+/*
+ * Whenever changing this, take care of that kmalloc_type() and
+ * create_kmalloc_caches() still work as intended.
+ */
+enum kmalloc_cache_type {
+	KMALLOC_NORMAL = 0,
+	KMALLOC_RECLAIM,
+#ifdef CONFIG_ZONE_DMA
+	KMALLOC_DMA,
+#endif
+	NR_KMALLOC_TYPES
+};
+
 #ifndef CONFIG_SLOB
 #ifndef CONFIG_SLOB
-extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+extern struct kmem_cache *
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
+{
+	int is_dma = 0;
+	int type_dma = 0;
+	int is_reclaimable;
+
 #ifdef CONFIG_ZONE_DMA
 #ifdef CONFIG_ZONE_DMA
-extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
+	is_dma = !!(flags & __GFP_DMA);
+	type_dma = is_dma * KMALLOC_DMA;
 #endif
 #endif
 
 
+	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
+
+	/*
+	 * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return
+	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
+	 */
+	return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM;
+}
+
 /*
 /*
  * Figure out which kmalloc slab an allocation of a certain size
  * Figure out which kmalloc slab an allocation of a certain size
  * belongs to.
  * belongs to.
@@ -501,18 +532,20 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 {
 	if (__builtin_constant_p(size)) {
 	if (__builtin_constant_p(size)) {
+#ifndef CONFIG_SLOB
+		unsigned int index;
+#endif
 		if (size > KMALLOC_MAX_CACHE_SIZE)
 		if (size > KMALLOC_MAX_CACHE_SIZE)
 			return kmalloc_large(size, flags);
 			return kmalloc_large(size, flags);
 #ifndef CONFIG_SLOB
 #ifndef CONFIG_SLOB
-		if (!(flags & GFP_DMA)) {
-			unsigned int index = kmalloc_index(size);
+		index = kmalloc_index(size);
 
 
-			if (!index)
-				return ZERO_SIZE_PTR;
+		if (!index)
+			return ZERO_SIZE_PTR;
 
 
-			return kmem_cache_alloc_trace(kmalloc_caches[index],
-					flags, size);
-		}
+		return kmem_cache_alloc_trace(
+				kmalloc_caches[kmalloc_type(flags)][index],
+				flags, size);
 #endif
 #endif
 	}
 	}
 	return __kmalloc(size, flags);
 	return __kmalloc(size, flags);
@@ -542,13 +575,14 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 {
 #ifndef CONFIG_SLOB
 #ifndef CONFIG_SLOB
 	if (__builtin_constant_p(size) &&
 	if (__builtin_constant_p(size) &&
-		size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
+		size <= KMALLOC_MAX_CACHE_SIZE) {
 		unsigned int i = kmalloc_index(size);
 		unsigned int i = kmalloc_index(size);
 
 
 		if (!i)
 		if (!i)
 			return ZERO_SIZE_PTR;
 			return ZERO_SIZE_PTR;
 
 
-		return kmem_cache_alloc_node_trace(kmalloc_caches[i],
+		return kmem_cache_alloc_node_trace(
+				kmalloc_caches[kmalloc_type(flags)][i],
 						flags, node, size);
 						flags, node, size);
 	}
 	}
 #endif
 #endif

+ 8 - 7
include/linux/swap.h

@@ -167,13 +167,14 @@ enum {
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 	SWP_BLKDEV	= (1 << 6),	/* its a block device */
 	SWP_BLKDEV	= (1 << 6),	/* its a block device */
-	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
-	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
-	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
-	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
-	SWP_SYNCHRONOUS_IO = (1 << 11),	/* synchronous IO is efficient */
+	SWP_ACTIVATED	= (1 << 7),	/* set after swap_activate success */
+	SWP_FS		= (1 << 8),	/* swap file goes through fs */
+	SWP_AREA_DISCARD = (1 << 9),	/* single-time swap area discards */
+	SWP_PAGE_DISCARD = (1 << 10),	/* freed swap page-cluster discards */
+	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
+	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
 					/* add others here before... */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 12),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 13),	/* refcount in scan_swap_map */
 };
 };
 
 
 #define SWAP_CLUSTER_MAX 32UL
 #define SWAP_CLUSTER_MAX 32UL
@@ -296,7 +297,7 @@ struct vma_swap_readahead {
 
 
 /* linux/mm/workingset.c */
 /* linux/mm/workingset.c */
 void *workingset_eviction(struct address_space *mapping, struct page *page);
 void *workingset_eviction(struct address_space *mapping, struct page *page);
-bool workingset_refault(void *shadow);
+void workingset_refault(struct page *page, void *shadow);
 void workingset_activation(struct page *page);
 void workingset_activation(struct page *page);
 
 
 /* Do not use directly, use workingset_lookup_update */
 /* Do not use directly, use workingset_lookup_update */

+ 1 - 0
include/trace/events/mmflags.h

@@ -88,6 +88,7 @@
 	{1UL << PG_dirty,		"dirty"		},		\
 	{1UL << PG_dirty,		"dirty"		},		\
 	{1UL << PG_lru,			"lru"		},		\
 	{1UL << PG_lru,			"lru"		},		\
 	{1UL << PG_active,		"active"	},		\
 	{1UL << PG_active,		"active"	},		\
+	{1UL << PG_workingset,		"workingset"	},		\
 	{1UL << PG_slab,		"slab"		},		\
 	{1UL << PG_slab,		"slab"		},		\
 	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
 	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
 	{1UL << PG_arch_1,		"arch_1"	},		\
 	{1UL << PG_arch_1,		"arch_1"	},		\

+ 5 - 1
include/uapi/linux/taskstats.h

@@ -34,7 +34,7 @@
  */
  */
 
 
 
 
-#define TASKSTATS_VERSION	8
+#define TASKSTATS_VERSION	9
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 					 * in linux/sched.h */
 
 
@@ -164,6 +164,10 @@ struct taskstats {
 	/* Delay waiting for memory reclaim */
 	/* Delay waiting for memory reclaim */
 	__u64	freepages_count;
 	__u64	freepages_count;
 	__u64	freepages_delay_total;
 	__u64	freepages_delay_total;
+
+	/* Delay waiting for thrashing page */
+	__u64	thrashing_count;
+	__u64	thrashing_delay_total;
 };
 };
 
 
 
 

+ 19 - 0
init/Kconfig

@@ -490,6 +490,25 @@ config TASK_IO_ACCOUNTING
 
 
 	  Say N if unsure.
 	  Say N if unsure.
 
 
+config PSI
+	bool "Pressure stall information tracking"
+	help
+	  Collect metrics that indicate how overcommitted the CPU, memory,
+	  and IO capacity are in the system.
+
+	  If you say Y here, the kernel will create /proc/pressure/ with the
+	  pressure statistics files cpu, memory, and io. These will indicate
+	  the share of walltime in which some or all tasks in the system are
+	  delayed due to contention of the respective resource.
+
+	  In kernels with cgroup support, cgroups (cgroup2 only) will
+	  have cpu.pressure, memory.pressure, and io.pressure files,
+	  which aggregate pressure stalls for the grouped tasks only.
+
+	  For more details see Documentation/accounting/psi.txt.
+
+	  Say N if unsure.
+
 endmenu # "CPU/Task time and stats accounting"
 endmenu # "CPU/Task time and stats accounting"
 
 
 config CPU_ISOLATION
 config CPU_ISOLATION

部分文件因为文件数量过多而无法显示