Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86

* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (32 commits)
  x86: cpa, strict range check in try_preserve_large_page()
  x86: cpa, enable CONFIG_DEBUG_PAGEALLOC on 64-bit
  x86: cpa, use page pool
  x86: introduce page pool in cpa
  x86: DEBUG_PAGEALLOC: enable after mem_init()
  brk: help text typo fix
  lguest: accept guest _PAGE_PWT page table entries
  x86 PM: update stale comments
  x86 PM: consolidate suspend and hibernation code
  x86 PM: rename 32-bit files in arch/x86/power
  x86 PM: move 64-bit hibernation files to arch/x86/power
  x86: trivial printk optimizations
  x86: fix early_ioremap pagetable ops
  x86: construct 32-bit boot time page tables in native format.
  x86, core: remove CONFIG_FORCED_INLINING
  x86: avoid unused variable warning in mm/init_64.c
  x86: fixup more paravirt fallout
  brk: document randomize_va_space and CONFIG_COMPAT_BRK (was Re:
  x86: fix sparse warnings in acpi/bus.c
  x86: fix sparse warning in topology.c
  ...
Linus Torvalds 17 years ago
parent
commit
0b6ca82af8

+ 0 - 9
Documentation/feature-removal-schedule.txt

@@ -111,15 +111,6 @@ Who:	Christoph Hellwig <hch@lst.de>
 
 
 ---------------------------
 ---------------------------
 
 
-What:	CONFIG_FORCED_INLINING
-When:	June 2006
-Why:	Config option is there to see if gcc is good enough. (in january
-        2006). If it is, the behavior should just be the default. If it's not,
-	the option should just go away entirely.
-Who:    Arjan van de Ven
-
----------------------------
-
 What:   eepro100 network driver
 What:   eepro100 network driver
 When:   January 2007
 When:   January 2007
 Why:    replaced by the e100 driver
 Why:    replaced by the e100 driver

+ 29 - 0
Documentation/sysctl/kernel.txt

@@ -41,6 +41,7 @@ show up in /proc/sys/kernel:
 - pid_max
 - pid_max
 - powersave-nap               [ PPC only ]
 - powersave-nap               [ PPC only ]
 - printk
 - printk
+- randomize_va_space
 - real-root-dev               ==> Documentation/initrd.txt
 - real-root-dev               ==> Documentation/initrd.txt
 - reboot-cmd                  [ SPARC only ]
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
 - rtsig-max
@@ -280,6 +281,34 @@ send before ratelimiting kicks in.
 
 
 ==============================================================
 ==============================================================
 
 
+randomize-va-space:
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+0 - Turn the process address space randomization off by default.
+
+1 - Make the addresses of mmap base, stack and VDSO page randomized.
+    This, among other things, implies that shared libraries will be
+    loaded to random addresses. Also for PIE-linked binaries, the location
+    of code start is randomized.
+
+    With heap randomization, the situation is a little bit more
+    complicated.
+    There a few legacy applications out there (such as some ancient
+    versions of libc.so.5 from 1996) that assume that brk area starts
+    just after the end of the code+bss. These applications break when
+    start of the brk area is randomized. There are however no known
+    non-legacy applications that would be broken this way, so for most
+    systems it is safe to choose full randomization. However there is
+    a CONFIG_COMPAT_BRK option for systems with ancient and/or broken
+    binaries, that makes heap non-randomized, but keeps all other
+    parts of process address space randomized if randomize_va_space
+    sysctl is turned on.
+
+==============================================================
+
 reboot-cmd: (Sparc only)
 reboot-cmd: (Sparc only)
 
 
 ??? This seems to be a way to give an argument to the Sparc
 ??? This seems to be a way to give an argument to the Sparc

+ 1 - 5
arch/x86/Kconfig.debug

@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
 
 
 	  This option will slow down process creation somewhat.
 	  This option will slow down process creation somewhat.
 
 
-comment "Page alloc debug is incompatible with Software Suspend on i386"
-	depends on DEBUG_KERNEL && HIBERNATION
-	depends on X86_32
-
 config DEBUG_PAGEALLOC
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
 	bool "Debug page memory allocations"
-	depends on DEBUG_KERNEL && X86_32
+	depends on DEBUG_KERNEL
 	help
 	help
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
 	  This results in a large slowdown, but helps to find certain types

+ 3 - 1
arch/x86/Makefile

@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI)            += arch/x86/pci/
 # must be linked after kernel/
 # must be linked after kernel/
 drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
 drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
 
 
-ifeq ($(CONFIG_X86_32),y)
+# suspend and hibernation support
 drivers-$(CONFIG_PM) += arch/x86/power/
 drivers-$(CONFIG_PM) += arch/x86/power/
+
+ifeq ($(CONFIG_X86_32),y)
 drivers-$(CONFIG_FB) += arch/x86/video/
 drivers-$(CONFIG_FB) += arch/x86/video/
 endif
 endif
 
 

+ 14 - 10
arch/x86/boot/printf.c

@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
 #define PLUS	4		/* show plus */
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
 #define SPACE	8		/* space if plus */
 #define LEFT	16		/* left justified */
 #define LEFT	16		/* left justified */
-#define SPECIAL	32		/* 0x */
-#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+#define SMALL	32		/* Must be 32 == 0x20 */
+#define SPECIAL	64		/* 0x */
 
 
 #define do_div(n,base) ({ \
 #define do_div(n,base) ({ \
 int __res; \
 int __res; \
@@ -45,12 +45,16 @@ __res; })
 static char *number(char *str, long num, int base, int size, int precision,
 static char *number(char *str, long num, int base, int size, int precision,
 		    int type)
 		    int type)
 {
 {
-	char c, sign, tmp[66];
-	const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+	char tmp[66];
+	char c, sign, locase;
 	int i;
 	int i;
 
 
-	if (type & LARGE)
-		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
+	 * produces same digits or (maybe lowercased) letters */
+	locase = (type & SMALL);
 	if (type & LEFT)
 	if (type & LEFT)
 		type &= ~ZEROPAD;
 		type &= ~ZEROPAD;
 	if (base < 2 || base > 36)
 	if (base < 2 || base > 36)
@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
 		tmp[i++] = '0';
 		tmp[i++] = '0';
 	else
 	else
 		while (num != 0)
 		while (num != 0)
-			tmp[i++] = digits[do_div(num, base)];
+			tmp[i++] = (digits[do_div(num, base)] | locase);
 	if (i > precision)
 	if (i > precision)
 		precision = i;
 		precision = i;
 	size -= precision;
 	size -= precision;
@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
 			*str++ = '0';
 			*str++ = '0';
 		else if (base == 16) {
 		else if (base == 16) {
 			*str++ = '0';
 			*str++ = '0';
-			*str++ = digits[33];
+			*str++ = ('X' | locase);
 		}
 		}
 	}
 	}
 	if (!(type & LEFT))
 	if (!(type & LEFT))
@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
 			base = 8;
 			base = 8;
 			break;
 			break;
 
 
-		case 'X':
-			flags |= LARGE;
 		case 'x':
 		case 'x':
+			flags |= SMALL;
+		case 'X':
 			base = 16;
 			base = 16;
 			break;
 			break;
 
 

+ 0 - 1
arch/x86/configs/i386_defconfig

@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_FAULT_INJECTION is not set

+ 0 - 1
arch/x86/configs/x86_64_defconfig

@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_FAULT_INJECTION is not set

+ 0 - 2
arch/x86/kernel/Makefile

@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o
         obj-y				+= genapic_64.o genapic_flat_64.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
-        obj-$(CONFIG_PM)		+= suspend_64.o
-        obj-$(CONFIG_HIBERNATION)	+= suspend_asm_64.o
 
 
         obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
         obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
         obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
         obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o

+ 1 - 1
arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
 
 
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 {
 {
-	return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }
 }
 
 
 /* Mutex protecting device creation against CPU hotplug */
 /* Mutex protecting device creation against CPU hotplug */

+ 6 - 9
arch/x86/kernel/entry_32.S

@@ -409,7 +409,8 @@ restore_nocheck_notrace:
 	RESTORE_REGS
 	RESTORE_REGS
 	addl $4, %esp			# skip orig_eax/error_code
 	addl $4, %esp			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 	CFI_ADJUST_CFA_OFFSET -4
-1:	INTERRUPT_RETURN
+ENTRY(irq_return)
+	INTERRUPT_RETURN
 .section .fixup,"ax"
 .section .fixup,"ax"
 iret_exc:
 iret_exc:
 	pushl $0			# no error code
 	pushl $0			# no error code
@@ -418,7 +419,7 @@ iret_exc:
 .previous
 .previous
 .section __ex_table,"a"
 .section __ex_table,"a"
 	.align 4
 	.align 4
-	.long 1b,iret_exc
+	.long irq_return,iret_exc
 .previous
 .previous
 
 
 	CFI_RESTORE_STATE
 	CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
 	RESTORE_REGS
 	RESTORE_REGS
 	lss 12+4(%esp), %esp		# back to espfix stack
 	lss 12+4(%esp), %esp		# back to espfix stack
 	CFI_ADJUST_CFA_OFFSET -24
 	CFI_ADJUST_CFA_OFFSET -24
-1:	INTERRUPT_RETURN
+	jmp irq_return
 	CFI_ENDPROC
 	CFI_ENDPROC
-.section __ex_table,"a"
-	.align 4
-	.long 1b,iret_exc
-.previous
 KPROBE_END(nmi)
 KPROBE_END(nmi)
 
 
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 ENTRY(native_iret)
-1:	iret
+	iret
 .section __ex_table,"a"
 .section __ex_table,"a"
 	.align 4
 	.align 4
-	.long 1b,iret_exc
+	.long native_iret, iret_exc
 .previous
 .previous
 END(native_iret)
 END(native_iret)
 
 

+ 13 - 5
arch/x86/kernel/entry_64.S

@@ -581,16 +581,24 @@ retint_restore_args:	/* return to kernel space */
 	 */
 	 */
 	TRACE_IRQS_IRETQ
 	TRACE_IRQS_IRETQ
 restore_args:
 restore_args:
-	RESTORE_ARGS 0,8,0						
-#ifdef CONFIG_PARAVIRT
+	RESTORE_ARGS 0,8,0
+
+ENTRY(irq_return)
 	INTERRUPT_RETURN
 	INTERRUPT_RETURN
-#endif
+
+	.section __ex_table, "a"
+	.quad irq_return, bad_iret
+	.previous
+
+#ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
 ENTRY(native_iret)
 	iretq
 	iretq
 
 
 	.section __ex_table,"a"
 	.section __ex_table,"a"
 	.quad native_iret, bad_iret
 	.quad native_iret, bad_iret
 	.previous
 	.previous
+#endif
+
 	.section .fixup,"ax"
 	.section .fixup,"ax"
 bad_iret:
 bad_iret:
 	/*
 	/*
@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
 	SWAPGS_UNSAFE_STACK
 	SWAPGS_UNSAFE_STACK
 paranoid_restore\trace:
 paranoid_restore\trace:
 	RESTORE_ALL 8
 	RESTORE_ALL 8
-	INTERRUPT_RETURN
+	jmp irq_return
 paranoid_userspace\trace:
 paranoid_userspace\trace:
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
 	movl threadinfo_flags(%rcx),%ebx
 	movl threadinfo_flags(%rcx),%ebx
@@ -919,7 +927,7 @@ error_kernelspace:
 	   iret run with kernel gs again, so don't set the user space flag.
 	   iret run with kernel gs again, so don't set the user space flag.
 	   B stepping K8s sometimes report an truncated RIP for IRET 
 	   B stepping K8s sometimes report an truncated RIP for IRET 
 	   exceptions returning to compat mode. Check for these here too. */
 	   exceptions returning to compat mode. Check for these here too. */
-	leaq native_iret(%rip),%rbp
+	leaq irq_return(%rip),%rbp
 	cmpq %rbp,RIP(%rsp) 
 	cmpq %rbp,RIP(%rsp) 
 	je   error_swapgs
 	je   error_swapgs
 	movl %ebp,%ebp	/* zero extend */
 	movl %ebp,%ebp	/* zero extend */

+ 1 - 4
arch/x86/kernel/geode_32.c

@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
 
 
 static int __init geode_southbridge_init(void)
 static int __init geode_southbridge_init(void)
 {
 {
-	int timers;
-
 	if (!is_geode())
 	if (!is_geode())
 		return -ENODEV;
 		return -ENODEV;
 
 
 	init_lbars();
 	init_lbars();
-	timers = geode_mfgpt_detect();
-	printk(KERN_INFO "geode:  %d MFGPT timers available.\n", timers);
+	(void) mfgpt_timer_setup();
 	return 0;
 	return 0;
 }
 }
 
 

+ 116 - 35
arch/x86/kernel/head_32.S

@@ -19,6 +19,10 @@
 #include <asm/thread_info.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
 
 
 /*
 /*
  * References to members of the new_cpu_data structure.
  * References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
  */
  */
 .section .text.head,"ax",@progbits
 .section .text.head,"ax",@progbits
 ENTRY(startup_32)
 ENTRY(startup_32)
-	/* check to see if KEEP_SEGMENTS flag is meaningful */
-	cmpw $0x207, BP_version(%esi)
-	jb 1f
-
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
 		us to not reload segments */
 	testb $(1<<6), BP_loadflags(%esi)
 	testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
 /*
 /*
  * Set segments to known values.
  * Set segments to known values.
  */
  */
-1:	lgdt boot_gdt_descr - __PAGE_OFFSET
+	lgdt pa(boot_gdt_descr)
 	movl $(__BOOT_DS),%eax
 	movl $(__BOOT_DS),%eax
 	movl %eax,%ds
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
  */
  */
 	cld
 	cld
 	xorl %eax,%eax
 	xorl %eax,%eax
-	movl $__bss_start - __PAGE_OFFSET,%edi
-	movl $__bss_stop - __PAGE_OFFSET,%ecx
+	movl $pa(__bss_start),%edi
+	movl $pa(__bss_stop),%ecx
 	subl %edi,%ecx
 	subl %edi,%ecx
 	shrl $2,%ecx
 	shrl $2,%ecx
 	rep ; stosl
 	rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
  * (kexec on panic case). Hence copy out the parameters before initializing
  * (kexec on panic case). Hence copy out the parameters before initializing
  * page tables.
  * page tables.
  */
  */
-	movl $(boot_params - __PAGE_OFFSET),%edi
+	movl $pa(boot_params),%edi
 	movl $(PARAM_SIZE/4),%ecx
 	movl $(PARAM_SIZE/4),%ecx
 	cld
 	cld
 	rep
 	rep
 	movsl
 	movsl
-	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+	movl pa(boot_params) + NEW_CL_POINTER,%esi
 	andl %esi,%esi
 	andl %esi,%esi
 	jz 1f			# No comand line
 	jz 1f			# No comand line
-	movl $(boot_command_line - __PAGE_OFFSET),%edi
+	movl $pa(boot_command_line),%edi
 	movl $(COMMAND_LINE_SIZE/4),%ecx
 	movl $(COMMAND_LINE_SIZE/4),%ecx
 	rep
 	rep
 	movsl
 	movsl
 1:
 1:
 
 
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT
-	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+	/* This is can only trip for a broken bootloader... */
+	cmpw $0x207, pa(boot_params + BP_version)
 	jb default_entry
 	jb default_entry
 
 
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 	/* Paravirt-compatible boot parameters.  Look to see what architecture
 		we're booting under. */
 		we're booting under. */
-	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+	movl pa(boot_params + BP_hardware_subarch), %eax
 	cmpl $num_subarch_entries, %eax
 	cmpl $num_subarch_entries, %eax
 	jae bad_subarch
 	jae bad_subarch
 
 
-	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+	movl pa(subarch_entries)(,%eax,4), %eax
 	subl $__PAGE_OFFSET, %eax
 	subl $__PAGE_OFFSET, %eax
 	jmp *%eax
 	jmp *%eax
 
 
@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
  * Mappings are created both at virtual address 0 (identity mapping)
  * Mappings are created both at virtual address 0 (identity mapping)
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
  *
  *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
  */
  */
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR	0x007		/* PRESENT+RW+USER */
+#define PDE_ATTR	0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR	0x001		/* PRESENT (no other attributes) */
 
 
 default_entry:
 default_entry:
-	movl $(pg0 - __PAGE_OFFSET), %edi
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+
+	/*
+	 * In PAE mode swapper_pg_dir is statically defined to contain enough
+	 * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+	 * entries). The identity mapping is handled by pointing two PGD
+	 * entries to the first kernel PMD.
+	 *
+	 * Note the upper half of each PMD or PTE are always zero at
+	 * this stage.
+	 */
+
+#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
+
+	xorl %ebx,%ebx				/* %ebx is kept at zero */
+
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_pmd), %edx
+	movl $PTE_ATTR, %eax
+10:
+	leal PDE_ATTR(%edi),%ecx		/* Create PMD entry */
+	movl %ecx,(%edx)			/* Store PMD entry */
+						/* Upper half already zero */
+	addl $8,%edx
+	movl $512,%ecx
+11:
+	stosl
+	xchgl %eax,%ebx
+	stosl
+	xchgl %eax,%ebx
+	addl $0x1000,%eax
+	loop 11b
+
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables.
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+	cmpl %ebp,%eax
+	jb 10b
+1:
+	movl %edi,pa(init_pg_tables_end)
+
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
+#else	/* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+	movl $pa(pg0), %edi
+	movl $pa(swapper_pg_dir), %edx
+	movl $PTE_ATTR, %eax
 10:
 10:
-	leal 0x007(%edi),%ecx			/* Create PDE entry */
+	leal PDE_ATTR(%edi),%ecx		/* Create PDE entry */
 	movl %ecx,(%edx)			/* Store identity PDE entry */
 	movl %ecx,(%edx)			/* Store identity PDE entry */
 	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
 	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
 	addl $4,%edx
 	addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
 	stosl
 	stosl
 	addl $0x1000,%eax
 	addl $0x1000,%eax
 	loop 11b
 	loop 11b
-	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
-	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+	/*
+	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * the attribute bits
+	 */
+	leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
 	cmpl %ebp,%eax
 	cmpl %ebp,%eax
 	jb 10b
 	jb 10b
-	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
-	/* Do an early initialization of the fixmap area */
-	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
-	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
-	movl %eax, 4092(%edx)
+	movl %edi,pa(init_pg_tables_end)
 
 
+	/* Do early initialization of the fixmap area */
+	movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+	movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
 	jmp 3f
 	jmp 3f
 /*
 /*
  * Non-boot CPU entry point; entered from trampoline.S
  * Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
  *	NOTE! We have to correct for the fact that we're
  *	NOTE! We have to correct for the fact that we're
  *	not yet offset PAGE_OFFSET..
  *	not yet offset PAGE_OFFSET..
  */
  */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
 	movl cr4_bits,%edx
 	movl cr4_bits,%edx
 	andl %edx,%edx
 	andl %edx,%edx
 	jz 6f
 	jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
 /*
 /*
  * Enable paging
  * Enable paging
  */
  */
-	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+	movl $pa(swapper_pg_dir),%eax
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %eax,%cr3		/* set the page table pointer.. */
 	movl %cr0,%eax
 	movl %cr0,%eax
-	orl $0x80000000,%eax
+	orl  $X86_CR0_PG,%eax
 	movl %eax,%cr0		/* ..and set paging (PG) bit */
 	movl %eax,%cr0		/* ..and set paging (PG) bit */
 	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
 	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
 1:
 1:
@@ -552,16 +605,44 @@ ENTRY(_stext)
  */
  */
 .section ".bss.page_aligned","wa"
 .section ".bss.page_aligned","wa"
 	.align PAGE_SIZE_asm
 	.align PAGE_SIZE_asm
+#ifdef CONFIG_X86_PAE
+ENTRY(swapper_pg_pmd)
+	.fill 1024*KPMDS,4,0
+#else
 ENTRY(swapper_pg_dir)
 ENTRY(swapper_pg_dir)
 	.fill 1024,4,0
 	.fill 1024,4,0
-ENTRY(swapper_pg_pmd)
+#endif
+ENTRY(swapper_pg_fixmap)
 	.fill 1024,4,0
 	.fill 1024,4,0
 ENTRY(empty_zero_page)
 ENTRY(empty_zero_page)
 	.fill 4096,1,0
 	.fill 4096,1,0
-
 /*
 /*
  * This starts the data section.
  * This starts the data section.
  */
  */
+#ifdef CONFIG_X86_PAE
+.section ".data.page_aligned","wa"
+	/* Page-aligned for the benefit of paravirt? */
+	.align PAGE_SIZE_asm
+ENTRY(swapper_pg_dir)
+	.long	pa(swapper_pg_pmd+PGD_ATTR),0		/* low identity map */
+# if KPMDS == 3
+	.long	pa(swapper_pg_pmd+PGD_ATTR),0
+	.long	pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+	.long	pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
+# elif KPMDS == 2
+	.long	0,0
+	.long	pa(swapper_pg_pmd+PGD_ATTR),0
+	.long	pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+# elif KPMDS == 1
+	.long	0,0
+	.long	0,0
+	.long	pa(swapper_pg_pmd+PGD_ATTR),0
+# else
+#  error "Kernel PMDs should be 1, 2 or 3"
+# endif
+	.align PAGE_SIZE_asm		/* needs to be page-sized too */
+#endif
+
 .data
 .data
 ENTRY(stack_start)
 ENTRY(stack_start)
 	.long init_thread_union+THREAD_SIZE
 	.long init_thread_union+THREAD_SIZE

+ 65 - 58
arch/x86/kernel/mfgpt_32.c

@@ -12,48 +12,37 @@
  */
  */
 
 
 /*
 /*
- * We are using the 32Khz input clock - its the only one that has the
+ * We are using the 32.768kHz input clock - it's the only one that has the
  * ranges we find desirable.  The following table lists the suitable
  * ranges we find desirable.  The following table lists the suitable
- * divisors and the associated hz, minimum interval
- * and the maximum interval:
+ * divisors and the associated Hz, minimum interval and the maximum interval:
  *
  *
- *  Divisor   Hz      Min Delta (S) Max Delta (S)
- *   1        32000     .0005          2.048
- *   2        16000      .001          4.096
- *   4         8000      .002          8.192
- *   8         4000      .004         16.384
- *   16        2000      .008         32.768
- *   32        1000      .016         65.536
- *   64         500      .032        131.072
- *  128         250      .064        262.144
- *  256         125      .128        524.288
+ *  Divisor   Hz      Min Delta (s)  Max Delta (s)
+ *   1        32768   .00048828125      2.000
+ *   2        16384   .0009765625       4.000
+ *   4         8192   .001953125        8.000
+ *   8         4096   .00390625        16.000
+ *   16        2048   .0078125         32.000
+ *   32        1024   .015625          64.000
+ *   64         512   .03125          128.000
+ *  128         256   .0625           256.000
+ *  256         128   .125            512.000
  */
  */
 
 
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
 #include <asm/geode.h>
 #include <asm/geode.h>
 
 
-#define F_AVAIL    0x01
-
 static struct mfgpt_timer_t {
 static struct mfgpt_timer_t {
-	int flags;
-	struct module *owner;
+	unsigned int avail:1;
 } mfgpt_timers[MFGPT_MAX_TIMERS];
 } mfgpt_timers[MFGPT_MAX_TIMERS];
 
 
 /* Selected from the table above */
 /* Selected from the table above */
 
 
 #define MFGPT_DIVISOR 16
 #define MFGPT_DIVISOR 16
 #define MFGPT_SCALE  4     /* divisor = 2^(scale) */
 #define MFGPT_SCALE  4     /* divisor = 2^(scale) */
-#define MFGPT_HZ  (32000 / MFGPT_DIVISOR)
+#define MFGPT_HZ  (32768 / MFGPT_DIVISOR)
 #define MFGPT_PERIODIC (MFGPT_HZ / HZ)
 #define MFGPT_PERIODIC (MFGPT_HZ / HZ)
 
 
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-static int __init mfgpt_timer_setup(void);
-#else
-#define mfgpt_timer_setup() (0)
-#endif
-
 /* Allow for disabling of MFGPTs */
 /* Allow for disabling of MFGPTs */
 static int disable;
 static int disable;
 static int __init mfgpt_disable(char *s)
 static int __init mfgpt_disable(char *s)
@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
  * In other cases (such as with VSAless OpenFirmware), the system firmware
  * In other cases (such as with VSAless OpenFirmware), the system firmware
  * leaves timers available for us to use.
  * leaves timers available for us to use.
  */
  */
-int __init geode_mfgpt_detect(void)
+
+
+static int timers = -1;
+
+static void geode_mfgpt_detect(void)
 {
 {
-	int count = 0, i;
+	int i;
 	u16 val;
 	u16 val;
 
 
+	timers = 0;
+
 	if (disable) {
 	if (disable) {
-		printk(KERN_INFO "geode-mfgpt:  Skipping MFGPT setup\n");
-		return 0;
+		printk(KERN_INFO "geode-mfgpt:  MFGPT support is disabled\n");
+		goto done;
+	}
+
+	if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
+		printk(KERN_INFO "geode-mfgpt:  MFGPT LBAR is not set up\n");
+		goto done;
 	}
 	}
 
 
 	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
 	for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
 		val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
 		val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
 		if (!(val & MFGPT_SETUP_SETUP)) {
 		if (!(val & MFGPT_SETUP_SETUP)) {
-			mfgpt_timers[i].flags = F_AVAIL;
-			count++;
+			mfgpt_timers[i].avail = 1;
+			timers++;
 		}
 		}
 	}
 	}
 
 
-	/* set up clock event device, if desired */
-	i = mfgpt_timer_setup();
-
-	return count;
+done:
+	printk(KERN_INFO "geode-mfgpt:  %d MFGPT timers available.\n", timers);
 }
 }
 
 
 int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
 int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
 	return 0;
 	return 0;
 }
 }
 
 
-static int mfgpt_get(int timer, struct module *owner)
+static int mfgpt_get(int timer)
 {
 {
-	mfgpt_timers[timer].flags &= ~F_AVAIL;
-	mfgpt_timers[timer].owner = owner;
+	mfgpt_timers[timer].avail = 0;
 	printk(KERN_INFO "geode-mfgpt:  Registered timer %d\n", timer);
 	printk(KERN_INFO "geode-mfgpt:  Registered timer %d\n", timer);
 	return timer;
 	return timer;
 }
 }
 
 
-int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner)
+int geode_mfgpt_alloc_timer(int timer, int domain)
 {
 {
 	int i;
 	int i;
 
 
-	if (!geode_get_dev_base(GEODE_DEV_MFGPT))
-		return -ENODEV;
+	if (timers == -1) {
+		/* timers haven't been detected yet */
+		geode_mfgpt_detect();
+	}
+
+	if (!timers)
+		return -1;
+
 	if (timer >= MFGPT_MAX_TIMERS)
 	if (timer >= MFGPT_MAX_TIMERS)
-		return -EIO;
+		return -1;
 
 
 	if (timer < 0) {
 	if (timer < 0) {
 		/* Try to find an available timer */
 		/* Try to find an available timer */
 		for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
 		for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
-			if (mfgpt_timers[i].flags & F_AVAIL)
-				return mfgpt_get(i, owner);
+			if (mfgpt_timers[i].avail)
+				return mfgpt_get(i);
 
 
 			if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
 			if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
 				break;
 				break;
 		}
 		}
 	} else {
 	} else {
 		/* If they requested a specific timer, try to honor that */
 		/* If they requested a specific timer, try to honor that */
-		if (mfgpt_timers[timer].flags & F_AVAIL)
-			return mfgpt_get(timer, owner);
+		if (mfgpt_timers[timer].avail)
+			return mfgpt_get(timer);
 	}
 	}
 
 
 	/* No timers available - too bad */
 	/* No timers available - too bad */
@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
 }
 }
 __setup("mfgpt_irq=", mfgpt_setup);
 __setup("mfgpt_irq=", mfgpt_setup);
 
 
-static inline void mfgpt_disable_timer(u16 clock)
+static void mfgpt_disable_timer(u16 clock)
 {
 {
-	u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP);
-	geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN);
+	/* avoid races by clearing CMP1 and CMP2 unconditionally */
+	geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
+			MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
 }
 }
 
 
 static int mfgpt_next_event(unsigned long, struct clock_event_device *);
 static int mfgpt_next_event(unsigned long, struct clock_event_device *);
@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
 	.shift = 32
 	.shift = 32
 };
 };
 
 
-static inline void mfgpt_start_timer(u16 clock, u16 delta)
+static void mfgpt_start_timer(u16 delta)
 {
 {
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
 	mfgpt_disable_timer(mfgpt_event_clock);
 	mfgpt_disable_timer(mfgpt_event_clock);
 
 
 	if (mode == CLOCK_EVT_MODE_PERIODIC)
 	if (mode == CLOCK_EVT_MODE_PERIODIC)
-		mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC);
+		mfgpt_start_timer(MFGPT_PERIODIC);
 
 
 	mfgpt_tick_mode = mode;
 	mfgpt_tick_mode = mode;
 }
 }
 
 
 static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
 static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
 {
 {
-	mfgpt_start_timer(mfgpt_event_clock, delta);
+	mfgpt_start_timer(delta);
 	return 0;
 	return 0;
 }
 }
 
 
-/* Assume (foolishly?), that this interrupt was due to our tick */
-
 static irqreturn_t mfgpt_tick(int irq, void *dev_id)
 static irqreturn_t mfgpt_tick(int irq, void *dev_id)
 {
 {
+	u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
+
+	/* See if the interrupt was for us */
+	if (!(val & (MFGPT_SETUP_SETUP  | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
+		return IRQ_NONE;
+
 	/* Turn off the clock (and clear the event) */
 	/* Turn off the clock (and clear the event) */
 	mfgpt_disable_timer(mfgpt_event_clock);
 	mfgpt_disable_timer(mfgpt_event_clock);
 
 
@@ -320,13 +328,12 @@ static struct irqaction mfgptirq  = {
 	.name = "mfgpt-timer"
 	.name = "mfgpt-timer"
 };
 };
 
 
-static int __init mfgpt_timer_setup(void)
+int __init mfgpt_timer_setup(void)
 {
 {
 	int timer, ret;
 	int timer, ret;
 	u16 val;
 	u16 val;
 
 
-	timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING,
-			THIS_MODULE);
+	timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
 	if (timer < 0) {
 	if (timer < 0) {
 		printk(KERN_ERR
 		printk(KERN_ERR
 		       "mfgpt-timer:  Could not allocate a MFPGT timer\n");
 		       "mfgpt-timer:  Could not allocate a MFPGT timer\n");
@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
 			&mfgpt_clockevent);
 			&mfgpt_clockevent);
 
 
 	printk(KERN_INFO
 	printk(KERN_INFO
-	       "mfgpt-timer:  registering the MFGT timer as a clock event.\n");
+	       "mfgpt-timer:  registering the MFGPT timer as a clock event.\n");
 	clockevents_register_device(&mfgpt_clockevent);
 	clockevents_register_device(&mfgpt_clockevent);
 
 
 	return 0;
 	return 0;

+ 4 - 0
arch/x86/kernel/setup_32.c

@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 EXPORT_SYMBOL(boot_cpu_data);
 EXPORT_SYMBOL(boot_cpu_data);
 
 
+#ifndef CONFIG_X86_PAE
 unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features;
+#else
+unsigned long mmu_cr4_features = X86_CR4_PAE;
+#endif
 
 
 /* for MCA, but anyone else can use it if they want */
 /* for MCA, but anyone else can use it if they want */
 unsigned int machine_id;
 unsigned int machine_id;

+ 1 - 1
arch/x86/kernel/topology.c

@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
 
 
 void arch_unregister_cpu(int num)
 void arch_unregister_cpu(int num)
 {
 {
-	return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
+	unregister_cpu(&per_cpu(cpu_devices, num).cpu);
 }
 }
 EXPORT_SYMBOL(arch_unregister_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
 #else
 #else

+ 29 - 45
arch/x86/mm/init_32.c

@@ -46,6 +46,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/sections.h>
 #include <asm/paravirt.h>
 #include <asm/paravirt.h>
+#include <asm/setup.h>
 
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
 
@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
 
 
 void __init native_pagetable_setup_start(pgd_t *base)
 void __init native_pagetable_setup_start(pgd_t *base)
 {
 {
-#ifdef CONFIG_X86_PAE
-	int i;
+	unsigned long pfn, va;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
 
 
 	/*
 	/*
-	 * Init entries of the first-level page table to the
-	 * zero page, if they haven't already been set up.
-	 *
-	 * In a normal native boot, we'll be running on a
-	 * pagetable rooted in swapper_pg_dir, but not in PAE
-	 * mode, so this will end up clobbering the mappings
-	 * for the lower 24Mbytes of the address space,
-	 * without affecting the kernel address space.
+	 * Remove any mappings which extend past the end of physical
+	 * memory from the boot time page table:
 	 */
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++)
-		set_pgd(&base[i],
-			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
-	/* Make sure kernel address space is empty so that a pagetable
-	   will be allocated for it. */
-	memset(&base[USER_PTRS_PER_PGD], 0,
-	       KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
+	for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+		va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
+		pgd = base + pgd_index(va);
+		if (!pgd_present(*pgd))
+			break;
+
+		pud = pud_offset(pgd, va);
+		pmd = pmd_offset(pud, va);
+		if (!pmd_present(*pmd))
+			break;
+
+		pte = pte_offset_kernel(pmd, va);
+		if (!pte_present(*pte))
+			break;
+
+		pte_clear(NULL, va, pte);
+	}
 	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
 	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
-#endif
 }
 }
 
 
 void __init native_pagetable_setup_done(pgd_t *base)
 void __init native_pagetable_setup_done(pgd_t *base)
 {
 {
-#ifdef CONFIG_X86_PAE
-	/*
-	 * Add low memory identity-mappings - SMP needs it when
-	 * starting up on an AP from real-mode. In the non-PAE
-	 * case we already have these mappings through head.S.
-	 * All user-space mappings are explicitly cleared after
-	 * SMP startup.
-	 */
-	set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
 }
 }
 
 
 /*
 /*
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * the boot process.
  * the boot process.
  *
  *
  * If we're booting on native hardware, this will be a pagetable
  * If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE).  The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S.  The root of the
+ * pagetable will be swapper_pg_dir.
  *
  *
  * If we're booting paravirtualized under a hypervisor, then there are
  * If we're booting paravirtualized under a hypervisor, then there are
  * more options: we may already be running PAE, and the pagetable may
  * more options: we may already be running PAE, and the pagetable may
@@ -537,14 +531,6 @@ void __init paging_init(void)
 
 
 	load_cr3(swapper_pg_dir);
 	load_cr3(swapper_pg_dir);
 
 
-#ifdef CONFIG_X86_PAE
-	/*
-	 * We will bail out later - printk doesn't work right now so
-	 * the user would just see a hanging kernel.
-	 */
-	if (cpu_has_pae)
-		set_in_cr4(X86_CR4_PAE);
-#endif
 	__flush_tlb_all();
 	__flush_tlb_all();
 
 
 	kmap_init();
 	kmap_init();
@@ -675,13 +661,11 @@ void __init mem_init(void)
 	BUG_ON((unsigned long)high_memory		> VMALLOC_START);
 	BUG_ON((unsigned long)high_memory		> VMALLOC_START);
 #endif /* double-sanity-check paranoia */
 #endif /* double-sanity-check paranoia */
 
 
-#ifdef CONFIG_X86_PAE
-	if (!cpu_has_pae)
-		panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
 	if (boot_cpu_data.wp_works_ok < 0)
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
 		test_wp_bit();
 
 
+	cpa_init();
+
 	/*
 	/*
 	 * Subtle. SMP is doing it's boot stuff late (because it has to
 	 * Subtle. SMP is doing it's boot stuff late (because it has to
 	 * fork idle threads) - but it also needs low mappings for the
 	 * fork idle threads) - but it also needs low mappings for the

+ 5 - 3
arch/x86/mm/init_64.c

@@ -528,13 +528,15 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		datasize >> 10,
 		initsize >> 10);
 		initsize >> 10);
+
+	cpa_init();
 }
 }
 
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
 {
-	unsigned long addr;
+	unsigned long addr = begin;
 
 
-	if (begin >= end)
+	if (addr >= end)
 		return;
 		return;
 
 
 	/*
 	/*
@@ -549,7 +551,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
 #else
 #else
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 
 
-	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+	for (; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
 		memset((void *)(addr & ~(PAGE_SIZE-1)),
 		memset((void *)(addr & ~(PAGE_SIZE-1)),

+ 31 - 24
arch/x86/mm/ioremap.c

@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str)
 early_param("early_ioremap_debug", early_ioremap_debug_setup);
 early_param("early_ioremap_debug", early_ioremap_debug_setup);
 
 
 static __initdata int after_paging_init;
 static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
 				__attribute__((aligned(PAGE_SIZE)));
 				__attribute__((aligned(PAGE_SIZE)));
 
 
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
 {
-	return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+	pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)];
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
 }
 }
 
 
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
 {
 {
-	return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+	return &bm_pte[pte_index(addr)];
 }
 }
 
 
 void __init early_ioremap_init(void)
 void __init early_ioremap_init(void)
 {
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 
 	if (early_ioremap_debug)
 	if (early_ioremap_debug)
 		printk(KERN_INFO "early_ioremap_init()\n");
 		printk(KERN_INFO "early_ioremap_init()\n");
 
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = __pa(bm_pte) | _PAGE_TABLE;
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
 	memset(bm_pte, 0, sizeof(bm_pte));
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
 	/*
 	/*
-	 * The boot-ioremap range spans multiple pgds, for which
+	 * The boot-ioremap range spans multiple pmds, for which
 	 * we are not prepared:
 	 * we are not prepared:
 	 */
 	 */
-	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 		WARN_ON(1);
 		WARN_ON(1);
-		printk(KERN_WARNING "pgd %p != %p\n",
-		       pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+		printk(KERN_WARNING "pmd %p != %p\n",
+		       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
-		       fix_to_virt(FIX_BTMAP_BEGIN));
+			fix_to_virt(FIX_BTMAP_BEGIN));
 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 		printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
-		       fix_to_virt(FIX_BTMAP_END));
+			fix_to_virt(FIX_BTMAP_END));
 
 
 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 		printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
 		printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void)
 
 
 void __init early_ioremap_clear(void)
 void __init early_ioremap_clear(void)
 {
 {
-	unsigned long *pgd;
+	pmd_t *pmd;
 
 
 	if (early_ioremap_debug)
 	if (early_ioremap_debug)
 		printk(KERN_INFO "early_ioremap_clear()\n");
 		printk(KERN_INFO "early_ioremap_clear()\n");
 
 
-	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
-	*pgd = 0;
-	paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
+	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+	pmd_clear(pmd);
+	paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
 	__flush_tlb_all();
 	__flush_tlb_all();
 }
 }
 
 
 void __init early_ioremap_reset(void)
 void __init early_ioremap_reset(void)
 {
 {
 	enum fixed_addresses idx;
 	enum fixed_addresses idx;
-	unsigned long *pte, phys, addr;
+	unsigned long addr, phys;
+	pte_t *pte;
 
 
 	after_paging_init = 1;
 	after_paging_init = 1;
 	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
 	for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
 		addr = fix_to_virt(idx);
 		addr = fix_to_virt(idx);
 		pte = early_ioremap_pte(addr);
 		pte = early_ioremap_pte(addr);
-		if (*pte & _PAGE_PRESENT) {
-			phys = *pte & PAGE_MASK;
+		if (pte_present(*pte)) {
+			phys = pte_val(*pte) & PAGE_MASK;
 			set_fixmap(idx, phys);
 			set_fixmap(idx, phys);
 		}
 		}
 	}
 	}
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void)
 static void __init __early_set_fixmap(enum fixed_addresses idx,
 static void __init __early_set_fixmap(enum fixed_addresses idx,
 				   unsigned long phys, pgprot_t flags)
 				   unsigned long phys, pgprot_t flags)
 {
 {
-	unsigned long *pte, addr = __fix_to_virt(idx);
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
 
 
 	if (idx >= __end_of_fixed_addresses) {
 	if (idx >= __end_of_fixed_addresses) {
 		BUG();
 		BUG();
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
 	}
 	}
 	pte = early_ioremap_pte(addr);
 	pte = early_ioremap_pte(addr);
 	if (pgprot_val(flags))
 	if (pgprot_val(flags))
-		*pte = (phys & PAGE_MASK) | pgprot_val(flags);
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
 	else
-		*pte = 0;
+		pte_clear(NULL, addr, pte);
 	__flush_tlb_one(addr);
 	__flush_tlb_one(addr);
 }
 }
 
 

+ 125 - 15
arch/x86/mm/pageattr.c

@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
+#include <linux/interrupt.h>
 
 
 #include <asm/e820.h>
 #include <asm/e820.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
  * or when the present bit is not set. Otherwise we would return a
  * or when the present bit is not set. Otherwise we would return a
  * pointer to a nonexisting mapping.
  * pointer to a nonexisting mapping.
  */
  */
-pte_t *lookup_address(unsigned long address, int *level)
+pte_t *lookup_address(unsigned long address, unsigned int *level)
 {
 {
 	pgd_t *pgd = pgd_offset_k(address);
 	pgd_t *pgd = pgd_offset_k(address);
 	pud_t *pud;
 	pud_t *pud;
@@ -252,10 +253,11 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
 try_preserve_large_page(pte_t *kpte, unsigned long address,
 			struct cpa_data *cpa)
 			struct cpa_data *cpa)
 {
 {
-	unsigned long nextpage_addr, numpages, pmask, psize, flags;
+	unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
 	pte_t new_pte, old_pte, *tmp;
 	pte_t new_pte, old_pte, *tmp;
 	pgprot_t old_prot, new_prot;
 	pgprot_t old_prot, new_prot;
-	int level, do_split = 1;
+	int i, do_split = 1;
+	unsigned int level;
 
 
 	spin_lock_irqsave(&pgd_lock, flags);
 	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	/*
@@ -301,6 +303,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 	new_prot = static_protections(new_prot, address);
 	new_prot = static_protections(new_prot, address);
 
 
+	/*
+	 * We need to check the full range, whether
+	 * static_protection() requires a different pgprot for one of
+	 * the pages in the range we try to preserve:
+	 */
+	addr = address + PAGE_SIZE;
+	for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
+		pgprot_t chk_prot = static_protections(new_prot, addr);
+
+		if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+			goto out_unlock;
+	}
+
 	/*
 	/*
 	 * If there are no changes, return. maxpages has been updated
 	 * If there are no changes, return. maxpages has been updated
 	 * above:
 	 * above:
@@ -335,23 +350,103 @@ out_unlock:
 	return do_split;
 	return do_split;
 }
 }
 
 
+static LIST_HEAD(page_pool);
+static unsigned long pool_size, pool_pages, pool_low;
+static unsigned long pool_used, pool_failed, pool_refill;
+
+static void cpa_fill_pool(void)
+{
+	struct page *p;
+	gfp_t gfp = GFP_KERNEL;
+
+	/* Do not allocate from interrupt context */
+	if (in_irq() || irqs_disabled())
+		return;
+	/*
+	 * Check unlocked. I does not matter when we have one more
+	 * page in the pool. The bit lock avoids recursive pool
+	 * allocations:
+	 */
+	if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+		return;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/*
+	 * We could do:
+	 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
+	 * but this fails on !PREEMPT kernels
+	 */
+	gfp =  GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
+#endif
+
+	while (pool_pages < pool_size) {
+		p = alloc_pages(gfp, 0);
+		if (!p) {
+			pool_failed++;
+			break;
+		}
+		spin_lock_irq(&pgd_lock);
+		list_add(&p->lru, &page_pool);
+		pool_pages++;
+		spin_unlock_irq(&pgd_lock);
+	}
+	clear_bit_unlock(0, &pool_refill);
+}
+
+#define SHIFT_MB		(20 - PAGE_SHIFT)
+#define ROUND_MB_GB		((1 << 10) - 1)
+#define SHIFT_MB_GB		10
+#define POOL_PAGES_PER_GB	16
+
+void __init cpa_init(void)
+{
+	struct sysinfo si;
+	unsigned long gb;
+
+	si_meminfo(&si);
+	/*
+	 * Calculate the number of pool pages:
+	 *
+	 * Convert totalram (nr of pages) to MiB and round to the next
+	 * GiB. Shift MiB to Gib and multiply the result by
+	 * POOL_PAGES_PER_GB:
+	 */
+	gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+	pool_size = POOL_PAGES_PER_GB * gb;
+	pool_low = pool_size;
+
+	cpa_fill_pool();
+	printk(KERN_DEBUG
+	       "CPA: page pool initialized %lu of %lu pages preallocated\n",
+	       pool_pages, pool_size);
+}
+
 static int split_large_page(pte_t *kpte, unsigned long address)
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 {
 	unsigned long flags, pfn, pfninc = 1;
 	unsigned long flags, pfn, pfninc = 1;
-	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned int i, level;
 	unsigned int i, level;
 	pte_t *pbase, *tmp;
 	pte_t *pbase, *tmp;
 	pgprot_t ref_prot;
 	pgprot_t ref_prot;
 	struct page *base;
 	struct page *base;
 
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
-#endif
-	base = alloc_pages(gfp_flags, 0);
-	if (!base)
+	/*
+	 * Get a page from the pool. The pool list is protected by the
+	 * pgd_lock, which we have to take anyway for the split
+	 * operation:
+	 */
+	spin_lock_irqsave(&pgd_lock, flags);
+	if (list_empty(&page_pool)) {
+		spin_unlock_irqrestore(&pgd_lock, flags);
 		return -ENOMEM;
 		return -ENOMEM;
+	}
+
+	base = list_first_entry(&page_pool, struct page, lru);
+	list_del(&base->lru);
+	pool_pages--;
+
+	if (pool_pages < pool_low)
+		pool_low = pool_pages;
 
 
-	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	/*
 	 * Check for races, another CPU might have split this page
 	 * Check for races, another CPU might have split this page
 	 * up for us already:
 	 * up for us already:
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	base = NULL;
 	base = NULL;
 
 
 out_unlock:
 out_unlock:
+	/*
+	 * If we dropped out via the lookup_address check under
+	 * pgd_lock then stick the page back into the pool:
+	 */
+	if (base) {
+		list_add(&base->lru, &page_pool);
+		pool_pages++;
+	} else
+		pool_used++;
 	spin_unlock_irqrestore(&pgd_lock, flags);
 	spin_unlock_irqrestore(&pgd_lock, flags);
 
 
-	if (base)
-		__free_pages(base, 0);
-
 	return 0;
 	return 0;
 }
 }
 
 
 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
 {
-	int level, do_split, err;
+	int do_split, err;
+	unsigned int level;
 	struct page *kpte_page;
 	struct page *kpte_page;
 	pte_t *kpte;
 	pte_t *kpte;
 
 
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	 * Check whether we really changed something:
 	 * Check whether we really changed something:
 	 */
 	 */
 	if (!cpa.flushtlb)
 	if (!cpa.flushtlb)
-		return ret;
+		goto out;
 
 
 	/*
 	/*
 	 * No need to flush, when we did not set any of the caching
 	 * No need to flush, when we did not set any of the caching
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	else
 	else
 		cpa_flush_all(cache);
 		cpa_flush_all(cache);
 
 
+out:
+	cpa_fill_pool();
 	return ret;
 	return ret;
 }
 }
 
 
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	 * but that can deadlock->flush only current cpu:
 	 * but that can deadlock->flush only current cpu:
 	 */
 	 */
 	__flush_tlb_all();
 	__flush_tlb_all();
+
+	/*
+	 * Try to refill the page pool here. We can do this only after
+	 * the tlb flush.
+	 */
+	cpa_fill_pool();
 }
 }
 #endif
 #endif
 
 

+ 2 - 2
arch/x86/power/Makefile

@@ -1,2 +1,2 @@
-obj-$(CONFIG_PM)		+= cpu.o
-obj-$(CONFIG_HIBERNATION)	+= swsusp.o suspend.o
+obj-$(CONFIG_PM_SLEEP)		+= cpu_$(BITS).o
+obj-$(CONFIG_HIBERNATION)	+= hibernate_$(BITS).o hibernate_asm_$(BITS).o

+ 1 - 1
arch/x86/power/cpu.c → arch/x86/power/cpu_32.c

@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
  	savesegment(ss, ctxt->ss);
  	savesegment(ss, ctxt->ss);
 
 
 	/*
 	/*
-	 * control registers 
+	 * control registers
 	 */
 	 */
 	ctxt->cr0 = read_cr0();
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
 	ctxt->cr2 = read_cr2();

+ 3 - 157
arch/x86/kernel/suspend_64.c → arch/x86/power/cpu_64.c

@@ -1,8 +1,9 @@
 /*
 /*
- * Suspend support specific for i386.
+ * Suspend and hibernation support for x86-64
  *
  *
  * Distribute under GPLv2
  * Distribute under GPLv2
  *
  *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
  * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
  * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
  */
  */
@@ -14,9 +15,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/mtrr.h>
 #include <asm/mtrr.h>
 
 
-/* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
-
 static void fix_processor_context(void);
 static void fix_processor_context(void);
 
 
 struct saved_context saved_context;
 struct saved_context saved_context;
@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
 	mtrr_save_fixed_ranges(NULL);
 	mtrr_save_fixed_ranges(NULL);
 
 
 	/*
 	/*
-	 * control registers 
+	 * control registers
 	 */
 	 */
 	rdmsrl(MSR_EFER, ctxt->efer);
 	rdmsrl(MSR_EFER, ctxt->efer);
 	ctxt->cr0 = read_cr0();
 	ctxt->cr0 = read_cr0();
@@ -166,155 +164,3 @@ static void fix_processor_context(void)
                 loaddebug(&current->thread, 7);
                 loaddebug(&current->thread, 7);
 	}
 	}
 }
 }
-
-#ifdef CONFIG_HIBERNATION
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-/*
- * Address to jump to in the last phase of restore in order to get to the image
- * kernel's text (this value is passed in the image header).
- */
-unsigned long restore_jump_address;
-
-/*
- * Value of the cr3 register from before the hibernation (this value is passed
- * in the image header).
- */
-unsigned long restore_cr3;
-
-pgd_t *temp_level4_pgt;
-
-void *relocated_restore_code;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
-{
-	long i, j;
-
-	i = pud_index(address);
-	pud = pud + i;
-	for (; i < PTRS_PER_PUD; pud++, i++) {
-		unsigned long paddr;
-		pmd_t *pmd;
-
-		paddr = address + i*PUD_SIZE;
-		if (paddr >= end)
-			break;
-
-		pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-		if (!pmd)
-			return -ENOMEM;
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
-			unsigned long pe;
-
-			if (paddr >= end)
-				break;
-			pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
-			pe &= __supported_pte_mask;
-			set_pmd(pmd, __pmd(pe));
-		}
-	}
-	return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
-	unsigned long start, end, next;
-	int error;
-
-	temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
-	if (!temp_level4_pgt)
-		return -ENOMEM;
-
-	/* It is safe to reuse the original kernel mapping */
-	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
-	/* Set up the direct mapping from scratch */
-	start = (unsigned long)pfn_to_kaddr(0);
-	end = (unsigned long)pfn_to_kaddr(end_pfn);
-
-	for (; start < end; start = next) {
-		pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-		if (!pud)
-			return -ENOMEM;
-		next = start + PGDIR_SIZE;
-		if (next > end)
-			next = end;
-		if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
-			return error;
-		set_pgd(temp_level4_pgt + pgd_index(start),
-			mk_kernel_pgd(__pa(pud)));
-	}
-	return 0;
-}
-
-int swsusp_arch_resume(void)
-{
-	int error;
-
-	/* We have got enough memory and from now on we cannot recover */
-	if ((error = set_up_temporary_mappings()))
-		return error;
-
-	relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
-	if (!relocated_restore_code)
-		return -ENOMEM;
-	memcpy(relocated_restore_code, &core_restore_code,
-	       &restore_registers - &core_restore_code);
-
-	restore_image();
-	return 0;
-}
-
-/*
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
-	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-struct restore_data_record {
-	unsigned long jump_address;
-	unsigned long cr3;
-	unsigned long magic;
-};
-
-#define RESTORE_MAGIC	0x0123456789ABCDEFUL
-
-/**
- *	arch_hibernation_header_save - populate the architecture specific part
- *		of a hibernation image header
- *	@addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (max_size < sizeof(struct restore_data_record))
-		return -EOVERFLOW;
-	rdr->jump_address = restore_jump_address;
-	rdr->cr3 = restore_cr3;
-	rdr->magic = RESTORE_MAGIC;
-	return 0;
-}
-
-/**
- *	arch_hibernation_header_restore - read the architecture specific data
- *		from the hibernation image header
- *	@addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
-	struct restore_data_record *rdr = addr;
-
-	restore_jump_address = rdr->jump_address;
-	restore_cr3 = rdr->cr3;
-	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
-}
-#endif /* CONFIG_HIBERNATION */

+ 3 - 3
arch/x86/power/suspend.c → arch/x86/power/hibernate_32.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Suspend support specific for i386 - temporary page tables
+ * Hibernation support specific for i386 - temporary page tables
  *
  *
  * Distribute under GPLv2
  * Distribute under GPLv2
  *
  *
@@ -13,7 +13,7 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 
 
-/* Defined in arch/i386/power/swsusp.S */
+/* Defined in hibernate_asm_32.S */
 extern int restore_image(void);
 extern int restore_image(void);
 
 
 /* References to section boundaries */
 /* References to section boundaries */
@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
 pgd_t *resume_pg_dir;
 pgd_t *resume_pg_dir;
 
 
 /* The following three functions are based on the analogous code in
 /* The following three functions are based on the analogous code in
- * arch/i386/mm/init.c
+ * arch/x86/mm/init_32.c
  */
  */
 
 
 /*
 /*

+ 169 - 0
arch/x86/power/hibernate_64.c

@@ -0,0 +1,169 @@
+/*
+ * Hibernation support for x86-64
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/smp.h>
+#include <linux/suspend.h>
+#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/* Defined in hibernate_asm_64.S */
+extern int restore_image(void);
+
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
+pgd_t *temp_level4_pgt;
+
+void *relocated_restore_code;
+
+static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+	long i, j;
+
+	i = pud_index(address);
+	pud = pud + i;
+	for (; i < PTRS_PER_PUD; pud++, i++) {
+		unsigned long paddr;
+		pmd_t *pmd;
+
+		paddr = address + i*PUD_SIZE;
+		if (paddr >= end)
+			break;
+
+		pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+		if (!pmd)
+			return -ENOMEM;
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+		for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+			unsigned long pe;
+
+			if (paddr >= end)
+				break;
+			pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
+			pe &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(pe));
+		}
+	}
+	return 0;
+}
+
+static int set_up_temporary_mappings(void)
+{
+	unsigned long start, end, next;
+	int error;
+
+	temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
+	if (!temp_level4_pgt)
+		return -ENOMEM;
+
+	/* It is safe to reuse the original kernel mapping */
+	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+	/* Set up the direct mapping from scratch */
+	start = (unsigned long)pfn_to_kaddr(0);
+	end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+	for (; start < end; start = next) {
+		pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+		if (!pud)
+			return -ENOMEM;
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
+			return error;
+		set_pgd(temp_level4_pgt + pgd_index(start),
+			mk_kernel_pgd(__pa(pud)));
+	}
+	return 0;
+}
+
+int swsusp_arch_resume(void)
+{
+	int error;
+
+	/* We have got enough memory and from now on we cannot recover */
+	if ((error = set_up_temporary_mappings()))
+		return error;
+
+	relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+	if (!relocated_restore_code)
+		return -ENOMEM;
+	memcpy(relocated_restore_code, &core_restore_code,
+	       &restore_registers - &core_restore_code);
+
+	restore_image();
+	return 0;
+}
+
+/*
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+	unsigned long jump_address;
+	unsigned long cr3;
+	unsigned long magic;
+};
+
+#define RESTORE_MAGIC	0x0123456789ABCDEFUL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->jump_address = restore_jump_address;
+	rdr->cr3 = restore_cr3;
+	rdr->magic = RESTORE_MAGIC;
+	return 0;
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	restore_jump_address = rdr->jump_address;
+	restore_cr3 = rdr->cr3;
+	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}

+ 1 - 2
arch/x86/power/swsusp.S → arch/x86/power/hibernate_asm_32.S

@@ -1,7 +1,6 @@
 .text
 .text
 
 
-/* Originally gcc generated, modified by hand
- *
+/*
  * This may not use any stack, nor any variable that is not "NoSave":
  * This may not use any stack, nor any variable that is not "NoSave":
  *
  *
  * Its rewriting one kernel image with another. What is stack in "old"
  * Its rewriting one kernel image with another. What is stack in "old"

+ 7 - 2
arch/x86/kernel/suspend_asm_64.S → arch/x86/power/hibernate_asm_64.S

@@ -1,7 +1,12 @@
-/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl>
+/*
+ * Hibernation support for x86-64
  *
  *
  * Distribute under GPLv2.
  * Distribute under GPLv2.
  *
  *
+ * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright 2005 Andi Kleen <ak@suse.de>
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ *
  * swsusp_arch_resume must not use any stack or any nonlocal variables while
  * swsusp_arch_resume must not use any stack or any nonlocal variables while
  * copying pages:
  * copying pages:
  *
  *
@@ -9,7 +14,7 @@
  * image could very well be data page in "new" image, and overwriting
  * image could very well be data page in "new" image, and overwriting
  * your own stack under you is bad idea.
  * your own stack under you is bad idea.
  */
  */
-	
+
 	.text
 	.text
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/segment.h>

+ 3 - 3
arch/x86/xen/mmu.c

@@ -58,7 +58,7 @@
 
 
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 xmaddr_t arbitrary_virt_to_machine(unsigned long address)
 {
 {
-	int level;
+	unsigned int level;
 	pte_t *pte = lookup_address(address, &level);
 	pte_t *pte = lookup_address(address, &level);
 	unsigned offset = address & PAGE_MASK;
 	unsigned offset = address & PAGE_MASK;
 
 
@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
 {
 {
 	pte_t *pte, ptev;
 	pte_t *pte, ptev;
 	unsigned long address = (unsigned long)vaddr;
 	unsigned long address = (unsigned long)vaddr;
-	int level;
+	unsigned int level;
 
 
 	pte = lookup_address(address, &level);
 	pte = lookup_address(address, &level);
 	BUG_ON(pte == NULL);
 	BUG_ON(pte == NULL);
@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
 {
 {
 	pte_t *pte, ptev;
 	pte_t *pte, ptev;
 	unsigned long address = (unsigned long)vaddr;
 	unsigned long address = (unsigned long)vaddr;
-	int level;
+	unsigned int level;
 
 
 	pte = lookup_address(address, &level);
 	pte = lookup_address(address, &level);
 	BUG_ON(pte == NULL);
 	BUG_ON(pte == NULL);

+ 5 - 5
arch/x86/xen/time.c

@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
 /* Get the CPU speed from Xen */
 /* Get the CPU speed from Xen */
 unsigned long xen_cpu_khz(void)
 unsigned long xen_cpu_khz(void)
 {
 {
-	u64 cpu_khz = 1000000ULL << 32;
+	u64 xen_khz = 1000000ULL << 32;
 	const struct vcpu_time_info *info =
 	const struct vcpu_time_info *info =
 		&HYPERVISOR_shared_info->vcpu_info[0].time;
 		&HYPERVISOR_shared_info->vcpu_info[0].time;
 
 
-	do_div(cpu_khz, info->tsc_to_system_mul);
+	do_div(xen_khz, info->tsc_to_system_mul);
 	if (info->tsc_shift < 0)
 	if (info->tsc_shift < 0)
-		cpu_khz <<= -info->tsc_shift;
+		xen_khz <<= -info->tsc_shift;
 	else
 	else
-		cpu_khz >>= info->tsc_shift;
+		xen_khz >>= info->tsc_shift;
 
 
-	return cpu_khz;
+	return xen_khz;
 }
 }
 
 
 /*
 /*

+ 1 - 6
drivers/acpi/bus.c

@@ -31,6 +31,7 @@
 #include <linux/pm.h>
 #include <linux/pm.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_fs.h>
+#include <linux/acpi.h>
 #ifdef CONFIG_X86
 #ifdef CONFIG_X86
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #endif
 #endif
@@ -39,9 +40,6 @@
 
 
 #define _COMPONENT		ACPI_BUS_COMPONENT
 #define _COMPONENT		ACPI_BUS_COMPONENT
 ACPI_MODULE_NAME("bus");
 ACPI_MODULE_NAME("bus");
-#ifdef	CONFIG_X86
-extern void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger);
-#endif
 
 
 struct acpi_device *acpi_root;
 struct acpi_device *acpi_root;
 struct proc_dir_entry *acpi_root_dir;
 struct proc_dir_entry *acpi_root_dir;
@@ -653,8 +651,6 @@ void __init acpi_early_init(void)
 
 
 #ifdef CONFIG_X86
 #ifdef CONFIG_X86
 	if (!acpi_ioapic) {
 	if (!acpi_ioapic) {
-		extern u8 acpi_sci_flags;
-
 		/* compatible (0) means level (3) */
 		/* compatible (0) means level (3) */
 		if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
 		if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
 			acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
 			acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
@@ -664,7 +660,6 @@ void __init acpi_early_init(void)
 		acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
 		acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
 					 (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
 					 (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
 	} else {
 	} else {
-		extern int acpi_sci_override_gsi;
 		/*
 		/*
 		 * now that acpi_gbl_FADT is initialized,
 		 * now that acpi_gbl_FADT is initialized,
 		 * update it with result from INT_SRC_OVR parsing
 		 * update it with result from INT_SRC_OVR parsing

+ 2 - 2
drivers/lguest/page_tables.c

@@ -178,8 +178,8 @@ static void release_pte(pte_t pte)
 
 
 static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
 static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
 {
 {
-	if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE))
-	    || pte_pfn(gpte) >= cpu->lg->pfn_limit)
+	if ((pte_flags(gpte) & _PAGE_PSE) ||
+	    pte_pfn(gpte) >= cpu->lg->pfn_limit)
 		kill_guest(cpu, "bad page table entry");
 		kill_guest(cpu, "bad page table entry");
 }
 }
 
 

+ 4 - 0
include/asm-x86/acpi.h

@@ -89,6 +89,10 @@ extern int acpi_pci_disabled;
 extern int acpi_skip_timer_override;
 extern int acpi_skip_timer_override;
 extern int acpi_use_timer_override;
 extern int acpi_use_timer_override;
 
 
+extern u8 acpi_sci_flags;
+extern int acpi_sci_override_gsi;
+void acpi_pic_sci_set_trigger(unsigned int, u16);
+
 static inline void disable_acpi(void)
 static inline void disable_acpi(void)
 {
 {
 	acpi_disabled = 1;
 	acpi_disabled = 1;

+ 2 - 0
include/asm-x86/cacheflush.h

@@ -44,6 +44,8 @@ int set_memory_np(unsigned long addr, int numpages);
 
 
 void clflush_cache_range(void *addr, unsigned int size);
 void clflush_cache_range(void *addr, unsigned int size);
 
 
+void cpa_init(void);
+
 #ifdef CONFIG_DEBUG_RODATA
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 void mark_rodata_ro(void);
 #endif
 #endif

+ 7 - 2
include/asm-x86/geode.h

@@ -206,12 +206,17 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg)
 	return inw(base + reg + (timer * 8));
 	return inw(base + reg + (timer * 8));
 }
 }
 
 
-extern int __init geode_mfgpt_detect(void);
 extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
 extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
 extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable);
 extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable);
-extern int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner);
+extern int geode_mfgpt_alloc_timer(int timer, int domain);
 
 
 #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
 #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
 #define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
 #define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
 
 
+#ifdef CONFIG_GEODE_MFGPT_TIMER
+extern int __init mfgpt_timer_setup(void);
+#else
+static inline int mfgpt_timer_setup(void) { return 0; }
+#endif
+
 #endif
 #endif

+ 0 - 1
include/asm-x86/page_32.h

@@ -48,7 +48,6 @@ typedef unsigned long	pgprotval_t;
 typedef unsigned long	phys_addr_t;
 typedef unsigned long	phys_addr_t;
 
 
 typedef union { pteval_t pte, pte_low; } pte_t;
 typedef union { pteval_t pte, pte_low; } pte_t;
-typedef pte_t boot_pte_t;
 
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASSEMBLY__ */
 #endif	/* CONFIG_X86_PAE */
 #endif	/* CONFIG_X86_PAE */

+ 1 - 1
include/asm-x86/pgtable.h

@@ -255,7 +255,7 @@ enum {
  * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  * as a pte too.
  * as a pte too.
  */
  */
-extern pte_t *lookup_address(unsigned long address, int *level);
+extern pte_t *lookup_address(unsigned long address, unsigned int *level);
 
 
 /* local pte updates need not use xchg for locking */
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)

+ 0 - 4
include/asm-x86/pgtable_32.h

@@ -52,10 +52,6 @@ void paging_init(void);
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
 
 
-#define TWOLEVEL_PGDIR_SHIFT	22
-#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
-#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
  * physical memory until the kernel virtual memory starts.  That means that

+ 0 - 9
include/linux/compiler-gcc4.h

@@ -5,15 +5,6 @@
 /* These definitions are for GCC v4.x.  */
 /* These definitions are for GCC v4.x.  */
 #include <linux/compiler-gcc.h>
 #include <linux/compiler-gcc.h>
 
 
-#ifdef CONFIG_FORCED_INLINING
-# undef inline
-# undef __inline__
-# undef __inline
-# define inline			inline		__attribute__((always_inline))
-# define __inline__		__inline__	__attribute__((always_inline))
-# define __inline		__inline	__attribute__((always_inline))
-#endif
-
 #define __used			__attribute__((__used__))
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)

+ 1 - 1
init/Kconfig

@@ -587,7 +587,7 @@ config COMPAT_BRK
 	  disabled, and can be overriden runtime by setting
 	  disabled, and can be overriden runtime by setting
 	  /proc/sys/kernel/randomize_va_space to 2.
 	  /proc/sys/kernel/randomize_va_space to 2.
 
 
-	  On non-ancient distros (post-2000 ones) Y is usually a safe choice.
+	  On non-ancient distros (post-2000 ones) N is usually a safe choice.
 
 
 config BASE_FULL
 config BASE_FULL
 	default y
 	default y

+ 1 - 1
init/main.c

@@ -558,7 +558,6 @@ asmlinkage void __init start_kernel(void)
 	preempt_disable();
 	preempt_disable();
 	build_all_zonelists();
 	build_all_zonelists();
 	page_alloc_init();
 	page_alloc_init();
-	enable_debug_pagealloc();
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
 	parse_early_param();
 	parse_args("Booting kernel", static_command_line, __start___param,
 	parse_args("Booting kernel", static_command_line, __start___param,
@@ -614,6 +613,7 @@ asmlinkage void __init start_kernel(void)
 	vfs_caches_init_early();
 	vfs_caches_init_early();
 	cpuset_init_early();
 	cpuset_init_early();
 	mem_init();
 	mem_init();
+	enable_debug_pagealloc();
 	cpu_hotplug_init();
 	cpu_hotplug_init();
 	kmem_cache_init();
 	kmem_cache_init();
 	setup_per_cpu_pageset();
 	setup_per_cpu_pageset();

+ 0 - 14
lib/Kconfig.debug

@@ -465,20 +465,6 @@ config FRAME_POINTER
 	  some architectures or if you use external debuggers.
 	  some architectures or if you use external debuggers.
 	  If you don't debug the kernel, you can say N.
 	  If you don't debug the kernel, you can say N.
 
 
-config FORCED_INLINING
-	bool "Force gcc to inline functions marked 'inline'"
-	depends on DEBUG_KERNEL
-	default y
-	help
-	  This option determines if the kernel forces gcc to inline the functions
-	  developers have marked 'inline'. Doing so takes away freedom from gcc to
-	  do what it thinks is best, which is desirable for the gcc 3.x series of
-	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
-	  disabling this option will generate a smaller kernel there. Hopefully
-	  this algorithm is so good that allowing gcc4 to make the decision can
-	  become the default in the future, until then this option is there to
-	  test gcc for this.
-
 config BOOT_PRINTK_DELAY
 config BOOT_PRINTK_DELAY
 	bool "Delay each boot printk message by N milliseconds"
 	bool "Delay each boot printk message by N milliseconds"
 	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
 	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY

+ 27 - 22
lib/vsprintf.c

@@ -26,6 +26,9 @@
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
 #include <asm/div64.h>
 
 
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
 /**
 /**
  * simple_strtoul - convert a string to an unsigned long
  * simple_strtoul - convert a string to an unsigned long
  * @cp: The start of the string
  * @cp: The start of the string
@@ -41,17 +44,17 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
 		if (*cp == '0') {
 		if (*cp == '0') {
 			base = 8;
 			base = 8;
 			cp++;
 			cp++;
-			if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
+			if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
 				cp++;
 				cp++;
 				base = 16;
 				base = 16;
 			}
 			}
 		}
 		}
 	} else if (base == 16) {
 	} else if (base == 16) {
-		if (cp[0] == '0' && toupper(cp[1]) == 'X')
+		if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
 			cp += 2;
 			cp += 2;
 	}
 	}
 	while (isxdigit(*cp) &&
 	while (isxdigit(*cp) &&
-	       (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+	       (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
 		result = result*base + value;
 		result = result*base + value;
 		cp++;
 		cp++;
 	}
 	}
@@ -92,17 +95,17 @@ unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
 		if (*cp == '0') {
 		if (*cp == '0') {
 			base = 8;
 			base = 8;
 			cp++;
 			cp++;
-			if ((toupper(*cp) == 'X') && isxdigit(cp[1])) {
+			if ((TOLOWER(*cp) == 'x') && isxdigit(cp[1])) {
 				cp++;
 				cp++;
 				base = 16;
 				base = 16;
 			}
 			}
 		}
 		}
 	} else if (base == 16) {
 	} else if (base == 16) {
-		if (cp[0] == '0' && toupper(cp[1]) == 'X')
+		if (cp[0] == '0' && TOLOWER(cp[1]) == 'x')
 			cp += 2;
 			cp += 2;
 	}
 	}
-	while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
-	    ? toupper(*cp) : *cp)-'A'+10) < base) {
+	while (isxdigit(*cp)
+	 && (value = isdigit(*cp) ? *cp-'0' : TOLOWER(*cp)-'a'+10) < base) {
 		result = result*base + value;
 		result = result*base + value;
 		cp++;
 		cp++;
 	}
 	}
@@ -360,24 +363,25 @@ static noinline char* put_dec(char *buf, unsigned long long num)
 #define PLUS	4		/* show plus */
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
 #define SPACE	8		/* space if plus */
 #define LEFT	16		/* left justified */
 #define LEFT	16		/* left justified */
-#define SPECIAL	32		/* 0x */
-#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+#define SMALL	32		/* Must be 32 == 0x20 */
+#define SPECIAL	64		/* 0x */
 
 
 static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
 static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
 {
 {
-	char sign,tmp[66];
-	const char *digits;
-	/* we are called with base 8, 10 or 16, only, thus don't need "g..."  */
-	static const char small_digits[] = "0123456789abcdefx"; /* "ghijklmnopqrstuvwxyz"; */
-	static const char large_digits[] = "0123456789ABCDEFX"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+	char tmp[66];
+	char sign;
+	char locase;
 	int need_pfx = ((type & SPECIAL) && base != 10);
 	int need_pfx = ((type & SPECIAL) && base != 10);
 	int i;
 	int i;
 
 
-	digits = (type & LARGE) ? large_digits : small_digits;
+	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
+	 * produces same digits or (maybe lowercased) letters */
+	locase = (type & SMALL);
 	if (type & LEFT)
 	if (type & LEFT)
 		type &= ~ZEROPAD;
 		type &= ~ZEROPAD;
-	if (base < 2 || base > 36)
-		return NULL;
 	sign = 0;
 	sign = 0;
 	if (type & SIGN) {
 	if (type & SIGN) {
 		if ((signed long long) num < 0) {
 		if ((signed long long) num < 0) {
@@ -404,7 +408,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		tmp[i++] = '0';
 		tmp[i++] = '0';
 	/* Generic code, for any base:
 	/* Generic code, for any base:
 	else do {
 	else do {
-		tmp[i++] = digits[do_div(num,base)];
+		tmp[i++] = (digits[do_div(num,base)] | locase);
 	} while (num != 0);
 	} while (num != 0);
 	*/
 	*/
 	else if (base != 10) { /* 8 or 16 */
 	else if (base != 10) { /* 8 or 16 */
@@ -412,7 +416,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		int shift = 3;
 		int shift = 3;
 		if (base == 16) shift = 4;
 		if (base == 16) shift = 4;
 		do {
 		do {
-			tmp[i++] = digits[((unsigned char)num) & mask];
+			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
 			num >>= shift;
 			num >>= shift;
 		} while (num);
 		} while (num);
 	} else { /* base 10 */
 	} else { /* base 10 */
@@ -444,7 +448,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		++buf;
 		++buf;
 		if (base == 16) {
 		if (base == 16) {
 			if (buf < end)
 			if (buf < end)
-				*buf = digits[16]; /* for arbitrary base: digits[33]; */
+				*buf = ('X' | locase);
 			++buf;
 			++buf;
 		}
 		}
 	}
 	}
@@ -644,6 +648,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 				continue;
 				continue;
 
 
 			case 'p':
 			case 'p':
+				flags |= SMALL;
 				if (field_width == -1) {
 				if (field_width == -1) {
 					field_width = 2*sizeof(void *);
 					field_width = 2*sizeof(void *);
 					flags |= ZEROPAD;
 					flags |= ZEROPAD;
@@ -680,9 +685,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 				base = 8;
 				base = 8;
 				break;
 				break;
 
 
-			case 'X':
-				flags |= LARGE;
 			case 'x':
 			case 'x':
+				flags |= SMALL;
+			case 'X':
 				base = 16;
 				base = 16;
 				break;
 				break;